In [None]:
!pip install ultralytics

In [None]:
!gdown "18alTGY9OgS4xxxAagCH0"
!unzip fullDataHalftexted.zip 
!gdown '1-5Q6qsLK_U6xxxxxgZim8HTbuDEX'

In [None]:
import cv2 
import numpy as np 
import matplotlib.pyplot as plt 
from google.colab.patches import cv2_imshow
from ultralytics import YOLO


In [None]:
# @title Preprocessing all images

def textDisappear(image):
  # image = image[2*image.shape[0]//3:,:,:]    # uncomment for taking last 2/3rd of the image
  # plt.imshow(image)
  # plt.show()

  gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
  # Apply adaptive thresholding
  thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
  # Find contours
  contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
  # Loop through contours
  for contour in contours:
      # Get the bounding box of the contour
      x, y, w, h = cv2.boundingRect(contour)
      # Compute the aspect ratio of the bounding box
      aspect_ratio = w / float(h)
      # Filter out contours that are too wide or too tall to be text
      if aspect_ratio > 5 or aspect_ratio < 0.2:
          continue
      # Get the region of interest (ROI) corresponding to the contour
      roi = image[y:y+h, x:x+w]
      # Compute the average intensity of the ROI
      avg_intensity = np.mean(roi)
      # If the average intensity is below a threshold, assume the ROI contains readable text
      if avg_intensity < 200:
          # Fill the ROI with white color
          cv2.rectangle(image, (x, y), (x+w, y+h), (255, 255, 255), -1)
  return image

def draw_bboxes(image, bboxes):
    for bbox in bboxes:
        # convert the bounding box coordinates to integers
        xmin, ymin, xmax, ymax = map(int, bbox)

        # draw the bounding box on the image
        cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, 255, 0), thickness=2)
    
    # return the image with the bounding boxes drawn on it
    return image

In [None]:

# Load a model
model = YOLO('yolov8n.pt')  # load an official model
model = YOLO('/content/best.pt')  # load a custom model

In [None]:
impath = '/content/fullDataHalftexted/images/0_halftextedimg.jpg'  #@param

In [None]:
imageTexted = cv2.imread(impath)    # image with text in the background
image = textDisappear(imageTexted.copy())  # image with text disappeared

In [None]:
result = model(image)
boxes = result[0].boxes.xyxy.tolist()


0: 288x640 3 signs, 357.7ms
Speed: 1.1ms preprocess, 357.7ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 640)


In [None]:
image_with_bboxes = draw_bboxes(imageTexted, boxes)

cv2_imshow(image_with_bboxes)

In [None]:
# @title Extract Images

# load the image
image = cv2.imread(impath)

# crop the image within each bounding box
cropped_images = []
for bbox in boxes:
    xmin, ymin, xmax, ymax = map(int, bbox)
    cropped_image = image[ymin:ymax, xmin:xmax]
    cropped_image = textDisappear(cropped_image)
    cropped_images.append(cropped_image)

# show the cropped images
for cropped_image in cropped_images:
    if cropped_image.shape[0] > cropped_image.shape[1]:
      cropped_image = cv2.rotate(cropped_image, cv2.ROTATE_90_CLOCKWISE)

    cv2_imshow(cropped_image)
    print(cropped_image.shape)
    print("################################")

In [None]:

img1 = cropped_images[0]
img2 = cropped_images[1]
img3 = cropped_images[2]

# Convert images to grayscale
gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
gray3 = cv2.cvtColor(img3, cv2.COLOR_BGR2GRAY)

# Initialize the SIFT detector
sift = cv2.xfeatures2d.SIFT_create()

# Find the keypoints and descriptors for each image
kp1, des1 = sift.detectAndCompute(gray1, None)
kp2, des2 = sift.detectAndCompute(gray2, None)
kp3, des3 = sift.detectAndCompute(gray3, None)

# Create a BFMatcher object
bf = cv2.BFMatcher()

# Match descriptors
matches1_2 = bf.match(des1, des2)
matches1_3 = bf.match(des1, des3)

# Compute the average distance of the matched keypoints for each pair of images
total_distance1_2 = 0
for match in matches1_2:
    total_distance1_2 += match.distance
avg_distance1_2 = total_distance1_2 / len(matches1_2)

total_distance1_3 = 0
for match in matches1_3:
    total_distance1_3 += match.distance
avg_distance1_3 = total_distance1_3 / len(matches1_3)

# Use the average distance as a score
score2 = 1 / (1 + avg_distance1_2)
score3 = 1 / (1 + avg_distance1_3)

# Print the scores
print("Score for image 2:", score2)
print("Score for image 3:", score3)

# Determine which image has the highest score
if score2 > score3:
    print("Image 2 is closest to image 1.")
else:
    print("Image 3 is closest to image 1.")


Score for image 2: 0.004201741221328321
Score for image 3: 0.003756617736895768
Image 2 is closest to image 1.
