In [1]:
from google.colab import drive
drive.mount('/content/drive')

!cp -r /content/drive/MyDrive/AssignmentsIPCV/dataset.zip ./
!unzip dataset.zip

Mounted at /content/drive
Archive:  dataset.zip
   creating: dataset/
  inflating: __MACOSX/._dataset      
   creating: dataset/scenes/
  inflating: __MACOSX/dataset/._scenes  
  inflating: dataset/.DS_Store       
  inflating: __MACOSX/dataset/._.DS_Store  
   creating: dataset/models/
  inflating: __MACOSX/dataset/._models  
  inflating: dataset/scenes/scene12.png  
  inflating: __MACOSX/dataset/scenes/._scene12.png  
  inflating: dataset/scenes/scene10.png  
  inflating: __MACOSX/dataset/scenes/._scene10.png  
  inflating: dataset/scenes/scene11.png  
  inflating: __MACOSX/dataset/scenes/._scene11.png  
  inflating: dataset/scenes/scene5.png  
  inflating: __MACOSX/dataset/scenes/._scene5.png  
  inflating: dataset/scenes/scene4.png  
  inflating: __MACOSX/dataset/scenes/._scene4.png  
  inflating: dataset/scenes/scene6.png  
  inflating: __MACOSX/dataset/scenes/._scene6.png  
  inflating: dataset/scenes/scene7.png  
  inflating: __MACOSX/dataset/scenes/._scene7.png  
  inflating: 

In [2]:
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np

In [3]:
def load_and_split_images(folder, split_point, color_mode=cv2.IMREAD_COLOR):
    images_first_part = []
    images_second_part = []

    # Fonction pour extraire le numéro du nom du fichier
    def extract_number(filename):
        return int(''.join(filter(str.isdigit, filename)))

    # Tri des fichiers par ordre numérique en extrayant le numéro du nom de fichier
    sorted_filenames = sorted(os.listdir(folder), key=extract_number)

    for filename in sorted_filenames:
        img_path = os.path.join(folder, filename)
        if img_path.endswith(".png"):
            img = cv2.imread(img_path, color_mode)
            if img is not None:
                number = extract_number(filename)
                if number <= split_point:
                    images_first_part.append(img)
                else:
                    images_second_part.append(img)
            else:
                print(f"Failed to load image at {img_path}")
    return images_first_part, images_second_part


In [None]:
def load_and_split_images(folder, split_point, color_mode=cv2.IMREAD_COLOR):
    images_first_part = []
    images_second_part = []
    for filename in sorted(os.listdir(folder)):
        img_path = os.path.join(folder, filename)
        if img_path.endswith(".png"):
            img = cv2.imread(img_path, color_mode)
            if img is not None:
                number = int(filename.split('.')[0].replace('ref', '').replace('scene', ''))
                if number <= split_point:
                    images_first_part.append(img)
                else:
                    images_second_part.append(img)
            else:
                print(f"Failed to load image at {img_path}")
    return images_first_part, images_second_part

In [4]:
def apply_median_filter(images, kernel_size=5):
    filtered_images = []
    for img in images:
        filtered_img = cv2.medianBlur(img, kernel_size)
        filtered_images.append(cv2.cvtColor(filtered_img, cv2.COLOR_BGR2GRAY))  # Convert to grayscale
    return filtered_images

In [5]:
def angle_between_vectors(v1, v2):
    """Calculate the angle in degrees between vectors 'v1' and 'v2'"""
    unit_v1 = v1 / np.linalg.norm(v1)
    unit_v2 = v2 / np.linalg.norm(v2)
    dot_product = np.clip(np.dot(unit_v1, unit_v2), -1.0, 1.0)
    return np.degrees(np.arccos(dot_product))

In [9]:
def detect_and_mask_products(scene_image, model_images):
    sift = cv2.SIFT_create()
    index_params = dict(algorithm=1, trees=5)
    search_params = dict(checks=50)
    flann = cv2.FlannBasedMatcher(index_params, search_params)

    detections = []
    masked_image = scene_image.copy()
    scene_height, scene_width = scene_image.shape[:2]

    while True:
        scene_keypoints, scene_descriptors = sift.detectAndCompute(masked_image, None)
        max_matches = 0
        best_match = None
        best_model_keypoints = None
        best_homography = None

        for model_idx, model in enumerate(model_images):
            model_keypoints, model_descriptors = sift.detectAndCompute(model, None)
            matches = flann.knnMatch(model_descriptors, scene_descriptors, k=2)
            good_matches = [m for m, n in matches if m.distance < 0.7 * n.distance]

            if len(good_matches) > max_matches:
                max_matches = len(good_matches)
                best_match = model_idx
                best_model_keypoints = model_keypoints

                if len(good_matches) > 10:
                    src_pts = np.float32([model_keypoints[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
                    dst_pts = np.float32([scene_keypoints[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)
                    homography, _ = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
                    best_homography = homography

        if max_matches < 25:
            break  # Stop if new matches are significantly lower than previous or below the fixed threshold

        if best_homography is not None:
            # Calculate coordinates of model in scene and mask it
            h, w = model_images[best_match].shape[:2]

            pts = np.float32([[0, 0], [0, h - 1], [w - 1, h - 1], [w - 1, 0]]).reshape(-1, 1, 2)
            dst = cv2.perspectiveTransform(pts, best_homography)


            # Calculate angles between consecutive corners
            angles = []
            num_corners = len(dst)
            for i in range(num_corners):
                v1 = dst[i][0] - dst[i - 1][0]
                v2 = dst[(i + 1) % num_corners][0] - dst[i][0]
                angle = angle_between_vectors(v1, v2)
                angles.append(angle)

            print(angles)
            print(max_matches)

            if any(60 > angle or angle > 120 for angle in angles):
              break
            # Skip detection if angles are not close to 90 degrees
            if not all(85 <= angle <= 95 for angle in angles):
                # Mask the detected area for processing but don't add to detections
                cv2.fillConvexPoly(masked_image, dst.astype(int), 0)
                continue

            position = tuple(np.int32(dst[0, 0]))
            width = int(np.linalg.norm(dst[0][0] - dst[1][0]))
            height = int(np.linalg.norm(dst[0][0] - dst[3][0]))

            detection_details = (best_match, max_matches, position, width, height, dst)
            detections.append(detection_details)

            # Mask the detected area
            cv2.fillConvexPoly(masked_image, dst.astype(int), 0)

    return masked_image, detections

In [11]:
def draw_detections_on_image(image, detections, model_images):
    for detection in detections:
        best_match, max_matches, position, width, height, dst = detection
        color = (0, 255, 0)  # Green color for the rectangle
        cv2.polylines(image, [np.int32(dst)], True, color, 3)
        label = f'Product {best_match+1}'
        cv2.putText(image, label, position, cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2, cv2.LINE_AA)
    return image

In [12]:
# Setup paths
scenes_folder = '/content/dataset/scenes'
models_folder = '/content/dataset/models'

# Load and separate images
model_images_first_part, _ = load_and_split_images(models_folder, 14)
scene_images_first_part, _ = load_and_split_images(scenes_folder, 5, cv2.IMREAD_COLOR)

# Apply median filter and convert to grayscale
filtered_scene_images = apply_median_filter(scene_images_first_part, kernel_size=5)

# Detect products and mask them iteratively
for scene_image, original_image in zip(filtered_scene_images, scene_images_first_part):
    result_image, product_detections = detect_and_mask_products(scene_image, model_images_first_part)
    final_image = draw_detections_on_image(original_image, product_detections, model_images_first_part)
    for detection in product_detections:
        best_match, max_matches, position, width, height, dst = detection
        print(f"Product {best_match + 1} {{match: {max_matches}, position: {position}, width: {width}px, height: {height}px}}")

    plt.imshow(cv2.cvtColor(final_image, cv2.COLOR_BGR2RGB))  # Display the final image with detections
    plt.title('Final Image with Detections')
    plt.axis('off')
    plt.show()

Output hidden; open in https://colab.research.google.com to view.

In [20]:
def detect_and_mask_products_taskb(scene_image, model_images):
    sift = cv2.SIFT_create()
    index_params = dict(algorithm=1, trees=5)
    search_params = dict(checks=50)
    flann = cv2.FlannBasedMatcher(index_params, search_params)

    detections = []
    masked_image = scene_image.copy()
    scene_height, scene_width = scene_image.shape[:2]

    while True:
        scene_keypoints, scene_descriptors = sift.detectAndCompute(masked_image, None)
        max_matches = 0
        best_match = None
        best_model_keypoints = None
        best_homography = None

        for model_idx, model in enumerate(model_images):
            model_keypoints, model_descriptors = sift.detectAndCompute(model, None)
            matches = flann.knnMatch(model_descriptors, scene_descriptors, k=2)
            good_matches = [m for m, n in matches if m.distance < 0.75 * n.distance]  # Lowered distance

            if len(good_matches) > max_matches:
                max_matches = len(good_matches)
                best_match = model_idx
                best_model_keypoints = model_keypoints

                if len(good_matches) > 10:
                    src_pts = np.float32([model_keypoints[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
                    dst_pts = np.float32([scene_keypoints[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)
                    homography, _ = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
                    best_homography = homography

        if max_matches < 10:
            break  # Stop if new matches are significantly lower than previous or below the fixed threshold

        if best_homography is not None:
            # Calculate coordinates of model in scene and mask it
            h, w = model_images[best_match].shape[:2]

            pts = np.float32([[0, 0], [0, h - 1], [w - 1, h - 1], [w - 1, 0]]).reshape(-1, 1, 2)
            dst = cv2.perspectiveTransform(pts, best_homography)


            # Calculate angles between consecutive corners
            angles = []
            num_corners = len(dst)
            for i in range(num_corners):
                v1 = dst[i][0] - dst[i - 1][0]
                v2 = dst[(i + 1) % num_corners][0] - dst[i][0]
                angle = angle_between_vectors(v1, v2)
                angles.append(angle)

            print(angles)
            print(max_matches)

            if any(60 > angle or angle > 120 for angle in angles):
              break


            # # Skip detection if detected area is less than half of the reference area or less than half of the last detected area or angles are not close to 90 degrees
            # if not all(80 <= angle <= 100 for angle in angles):
            #     # Mask the detected area for processing but don't add to detections
            #     cv2.fillConvexPoly(masked_image, dst.astype(int), 0)
            #     continue

            position = tuple(np.int32(dst[0, 0]))
            width = int(np.linalg.norm(dst[0][0] - dst[1][0]))
            height = int(np.linalg.norm(dst[0][0] - dst[3][0]))

            detection_details = (best_match, max_matches, position, width, height, dst)
            detections.append(detection_details)

            # Mask the detected area
            cv2.fillConvexPoly(masked_image, dst.astype(int), 0)

    return masked_image, detections

def draw_detections_on_image(image, detections, model_images):
    for detection in detections:
        best_match, max_matches, position, width, height, dst = detection
        color = (0, 255, 0)  # Green color for the rectangle
        cv2.polylines(image, [np.int32(dst)], True, color, 3)
        label = f'Product {best_match}'
        cv2.putText(image, label, position, cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2, cv2.LINE_AA)
    return image


In [17]:
# Load and separate images
model_images_first_part, model_images_second_part = load_and_split_images(models_folder, 14)
scene_images_first_part, scene_images_second_part = load_and_split_images(scenes_folder, 5, cv2.IMREAD_COLOR)

# Apply median filter and convert to grayscale
filtered_scene_images = apply_median_filter(scene_images_second_part, kernel_size=5)

# Detect products and mask them iteratively
for scene_image, original_image in zip(filtered_scene_images, scene_images_second_part):
    result_image, product_detections = detect_and_mask_products_taskb(scene_image,model_images_second_part)
    final_image = draw_detections_on_image(original_image, product_detections, model_images_second_part)
    for detection in product_detections:
        best_match, max_matches, position, width, height, dst = detection
        print(f"Product {best_match + 16} {{match: {max_matches}, position: {position}, width: {width}px, height: {height}px}}")

    plt.imshow(cv2.cvtColor(final_image, cv2.COLOR_BGR2RGB))  # Display the final image with detections
    plt.title('Final Image with Detections')
    plt.axis('off')
    plt.show()


Output hidden; open in https://colab.research.google.com to view.

To see a specific image of the database

In [None]:
# import matplotlib.pyplot as plt
# scenes_folder = '/content/dataset/scenes'
# def apply_median_filter(images, kernel_size=9):
#     filtered_images = []
#     for img in images:
#         filtered_img = cv2.medianBlur(img, kernel_size)
#         filtered_images.append(filtered_img)
#     return filtered_images

# # Load and split images from the scenes folder
# scenes_folder = '/content/dataset/scenes'
# scene_images_first_part, _ = load_and_split_images(scenes_folder, 5, cv2.IMREAD_COLOR)

# # Apply the median filter to the loaded images
# filtered_scene_images = apply_median_filter(scene_images_first_part, kernel_size=5)

# # Convert a filtered image to grayscale and display it
# gray_image = cv2.cvtColor(filtered_scene_images[4], cv2.COLOR_BGR2RGB)
# plt.imshow(gray_image)  # Ensure the image is shown in grayscale
# plt.axis('off')  # Hide axes for clarity
# plt.show()

In [None]:
# plt.imshow(model_images_first_part[5], cmap='gray')