In [87]:
import pandas as pd
import numpy as np
import cv2
import os
from collections import deque
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix


In [65]:
train_dataset = pd.read_csv('Train.csv')

Select Classes and then select 100 images each

In [70]:
selected_classes = [0, 14, 30, 33, 38, 40]
final_df = train_dataset[train_dataset["ClassId"].isin(selected_classes)].groupby("ClassId").head(100).reset_index(drop=True)

In [71]:
final_df

Unnamed: 0,Width,Height,Roi.X1,Roi.Y1,Roi.X2,Roi.Y2,ClassId,Path
0,29,30,5,6,24,25,0,Train/0/00000_00000_00000.png
1,30,30,5,5,25,25,0,Train/0/00000_00000_00001.png
2,30,30,5,5,25,25,0,Train/0/00000_00000_00002.png
3,31,31,5,5,26,26,0,Train/0/00000_00000_00003.png
4,30,32,5,6,25,26,0,Train/0/00000_00000_00004.png
...,...,...,...,...,...,...,...,...
595,39,40,5,6,33,34,40,Train/40/00040_00003_00005.png
596,40,41,5,6,35,36,40,Train/40/00040_00003_00006.png
597,42,42,6,6,37,37,40,Train/40/00040_00003_00007.png
598,43,43,5,5,37,38,40,Train/40/00040_00003_00008.png


Make Directory of Selected Data with ClassIds Folders in it

In [72]:
for classid in selected_classes:
    folder = os.path.join('SelectedData/' , str(classid))
    os.makedirs(folder , exist_ok=True)

Copy Images In Selected Data folder

In [73]:
for i , row in final_df.iterrows():
    class_id = str(row["ClassId"])
    source_path = os.path.join(row["Path"])
    print(source_path)
    img = cv2.imread(source_path)

    
    # Get image name from path
    img_name = row["Path"].split("/")[-1]
    destination_path = os.path.join('SelectedData/', class_id, img_name)

    # Save image to new path
    cv2.imwrite(destination_path, img)
print("Images are copied Successfully")

Train/0/00000_00000_00000.png
Train/0/00000_00000_00001.png
Train/0/00000_00000_00002.png
Train/0/00000_00000_00003.png
Train/0/00000_00000_00004.png
Train/0/00000_00000_00005.png
Train/0/00000_00000_00006.png
Train/0/00000_00000_00007.png
Train/0/00000_00000_00008.png
Train/0/00000_00000_00009.png
Train/0/00000_00000_00010.png
Train/0/00000_00000_00011.png
Train/0/00000_00000_00012.png
Train/0/00000_00000_00013.png
Train/0/00000_00000_00014.png
Train/0/00000_00000_00015.png
Train/0/00000_00000_00016.png
Train/0/00000_00000_00017.png
Train/0/00000_00000_00018.png
Train/0/00000_00000_00019.png
Train/0/00000_00000_00020.png
Train/0/00000_00000_00021.png
Train/0/00000_00000_00022.png
Train/0/00000_00000_00023.png
Train/0/00000_00000_00024.png
Train/0/00000_00000_00025.png
Train/0/00000_00000_00026.png
Train/0/00000_00000_00027.png
Train/0/00000_00000_00028.png
Train/0/00000_00000_00029.png
Train/0/00000_00001_00000.png
Train/0/00000_00001_00001.png
Train/0/00000_00001_00002.png
Train/0/00

### PIPELINE IMPLEMENTATION

### Step 1: Image Reading

In [74]:
def ReadImage():
    images = []
    labels = []
    for i in os.listdir('SelectedData'):
        class_path = os.path.join('SelectedData', i)
        for name in os.listdir(class_path):
            image_path = os.path.join(class_path , name)
            image = cv2.imread(image_path)
            
            image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            images.append(image_rgb)
            labels.append(int(i))
    return images , labels

In [75]:
images , labels = ReadImage()

In [76]:
print("Total images : ", len(labels))

Total images :  600


### Step 2: Preprocessing and Filtering

1. Mean Filter

In [58]:
def MeanFilter(image, kernal_size=3):
    padding = (kernal_size - 1 ) // 2
    padded_image = np.pad(image , ((padding , padding) , (padding , padding) , (0,0)) , mode='constant' , constant_values=0)
    output = np.copy(image)
    
    for i in range(image.shape[0]):
        for j in range(image.shape[1]):
            extracted_window = padded_image[i:i+kernal_size , j:j+kernal_size]
            output[i , j] = np.mean(extracted_window , axis=(0,1))
    return output.astype(image.dtype)

2. Gaussian Filter

In [59]:
def gaussian_filter_on_channel(image_channel):
    sigma=1.0
    kernel_size=3
    padding = (kernel_size - 1) // 2
    ax = np.arange(-(padding), (padding) + 1)  # create range 
    xx, yy = np.meshgrid(ax, ax) # create x and y dimentions 
    
    kernel = np.exp(-(xx**2 + yy**2) / (2 * sigma**2)) # gaussian formula exp(-(x² + y²) / 2σ²)
    kernel = kernel / np.sum(kernel) # To normalize

    img_padded = np.pad(image_channel, ((padding, padding), (padding, padding)), mode='reflect')
    output = np.copy(image_channel)

    for i in range(image_channel.shape[0]):
        for j in range(image_channel.shape[1]):
            region = img_padded[i:i+kernel_size, j:j+kernel_size]
            output[i, j] = np.sum(region * kernel)
    return np.clip(output, 0, 255)

def guassian_filter(image):
    r = gaussian_filter_on_channel(image[:, :, 0])
    g = gaussian_filter_on_channel(image[:, :, 1])
    b = gaussian_filter_on_channel(image[:, :, 2])
    return np.stack([r, g, b], axis=2)

3. Median Filter

In [14]:
def median_filter(image):
    kernal_size = 3
    padding = (kernal_size - 1) // 2
    padded_image  = np.pad(image, ((padding , padding) , (padding , padding) , (0, 0)) , mode='reflect')
    output = np.copy(image)

    for i in range(image.shape[0]):
        for j in range(image.shape[1]):
            extracted_window = padded_image[i:i+kernal_size, j:j+kernal_size]
            output[i, j] = np.median(extracted_window, axis=(0, 1))
    return output.astype(image.dtype)

4. Adaptive Median Filter

In [15]:
def adaptive_median_filter(image , max_kernal_size = 7):
    padding = (max_kernal_size - 1) // 2
    padded_image = np.pad(image , ((padding , padding) , (padding , padding) , (0 , 0)) , mode='reflect')
    output = np.copy(image)
    rows, cols, channels = image.shape

    for ch in range(channels):
        for i in range(rows):
            for j in range(cols):
                center_i, center_j = i + padding, j + padding

                for k_size in range(3 , max_kernal_size + 1 , 2):
                    half = k_size//2
                    window = padded_image[center_i - half : center_i + half+1, center_j - half : center_j + half+1, ch]
                    zmin = np.min(window)
                    zmax = np.max(window)
                    zmed = np.median(window)
                    if zmin < zmed and zmed < zmax:
                        zxy = padded_image[center_i, center_j, ch]
                        if zxy > zmin and zxy < zmax:
                            output[i, j, ch] = zxy
                        else:
                            output[i, j, ch] = zmed
                        break
                    elif k_size == max_kernal_size:
                        output[i,j,ch] = zmed
                        break
    return output.astype(image.dtype)

5. Unsharp Masking and High Boost Filtering

In [16]:
def unsharp_masking(image):
    blurred_image = guassian_filter(image)
    image = image.astype(np.float32)
    blurred_image = blurred_image.astype(np.float32)
    sharpened_image = image + (image - blurred_image)  
    return np.clip(sharpened_image, 0, 255).astype(np.uint8)

def high_boost_filtering(image, alpha=3):
    blurred_image = guassian_filter(image)
    boosted_image = alpha*image - blurred_image
    return np.clip(boosted_image, 0, 255).astype(np.uint8)

### Step 3: Color Space Conversion & Segmentation

1. RGB to HSV Conversion

In [17]:
def RGBtoHSV(image):
    image = image.astype('float32')/255.0
    # seperate red , green , blue pixels
    R , G , B = image[: , : , 0] , image[: , : , 1] , image[: , : , 2]
    
    cmax = np.max(image , axis = 2)
    cmin = np.min(image , axis = 2)
    delta = cmax - cmin

    hue = np.zeros_like(cmax, dtype=np.float32)

    mask = (delta!=0)
    idx = (cmax == R) & mask
    hue[idx] = 60 * (((G[idx] - B[idx]) / delta[idx]) % 6)
    idx = (cmax == G) & mask
    hue[idx] = 60 * (((B[idx] - R[idx]) / delta[idx]) + 2)
    idx = (cmax == B) & mask
    hue[idx] = 60 * (((R[idx] - G[idx]) / delta[idx]) + 4)

    hue = np.clip(hue , 0 , 180)
    hue = (hue / 2).astype(np.uint8)

    Saturation = np.zeros_like(cmax)
    Saturation[cmax != 0] = (delta[cmax != 0] / cmax[cmax != 0]) * 255
    Saturation = np.clip(Saturation, 0, 255).astype(np.uint8)

    Value = (cmax * 255).astype(np.uint8)

    hsv = np.stack([hue, Saturation, Value], axis=2).astype(np.uint8)
    return hsv

2. Masking Out

In [18]:
def segment_red_blue(hsv_image):
    H = hsv_image[: , : , 0]
    S = hsv_image[: , : , 1]
    V = hsv_image[: , : , 2]

    mask_red = (((H >= 0) & (H <= 10)) | ((H >= 170) & (H <= 179))) & (S >= 100) & (V>=80)
    mask_blue = ((H >= 100) & (H <= 140)) & (S >= 50) & (V >= 50)
    
    # convert masks back to images
    image_red = (mask_red * 255).astype(np.uint8)
    image_blue = (mask_blue * 255).astype(np.uint8)
    
    combined_image = cv2.bitwise_or(image_red, image_blue) # combine both images to get 1 image
    return combined_image, image_red, image_blue

3. Binary mask thresholding

In [19]:
def binary_threshold(image, thresh=128):
    binary = np.zeros_like(image, dtype=np.uint8)
    binary[image >= thresh] = 1
    return binary

4. Erosion

In [20]:
def erosion(binary_image):
    kernal_size = 3
    padding = kernal_size // 2
    padded_image = np.pad(binary_image , padding , mode='constant' , constant_values=0)
    output_image = np.copy(binary_image) 
    for i in range(binary_image.shape[0]):
        for j in range(binary_image.shape[1]):
            region = padded_image[i:i+kernal_size , j:j+kernal_size]
            if np.all(region == 1):
                output_image[i , j] = 1
            else:
                output_image[i, j] = 0
    return output_image

5. Dilation

In [21]:
def dilation(binary_image):
    kernal_size = 3
    padding = kernal_size // 2
    padded_image = np.pad(binary_image , padding , mode='constant' , constant_values=0)
    output_image = np.copy(binary_image) 
    for i in range(binary_image.shape[0]):
        for j in range(binary_image.shape[1]):
            region = padded_image[i:i+kernal_size , j:j+kernal_size]
            if np.any(region == 1):
                output_image[i , j] = 1
            else:
                output_image[i, j] = 0
    return output_image

6. Opening

In [22]:
def opening(binary_img):
    return dilation(erosion(binary_img))

7. Connected Component Filtering

In [23]:
def connected_component_filtering(binary_image, area_threshold=50):
    height, width = binary_image.shape
    labeled_image = np.zeros_like(binary_image, dtype=np.uint8)
    visited = np.zeros_like(binary_image, dtype=bool)

    for i in range(height):
        for j in range(width):
            if binary_image[i, j] == 1 and not visited[i, j]:
                coords = []
                queue = deque([(i,j)])
                visited[i, j] = True
                coords.append((i, j))

                while(queue):
                    x, y = queue.popleft()
                    for dx, dy in [(-1,0), (1,0), (0,-1), (0,1)]:
                        nx, ny = x+dx, y+dy
                        if (0 <= nx < height and 0 <= ny < width and 
                            binary_image[nx,ny] == 1 and not visited[nx,ny]):
                            visited[nx,ny] = True
                            coords.append((nx,ny))
                            queue.append((nx,ny))

                if len(coords) >= area_threshold:
                    for x, y in coords:
                        labeled_image[x, y] = 1
    return labeled_image

8. Hole filling

In [24]:
def hole_filling(binary_img):
    filled = np.copy(binary_img)  # Create a copy of the input binary image
    height, width = binary_img.shape  # Get the image dimensions
    visited = np.zeros_like(binary_img, dtype=bool)  # Initialize a visited array
    
    def flood_fill(x, y):
        queue = deque([(x, y)])
        visited[x, y] = True
        filled[x, y] = 1
        
        while queue:
            cx, cy = queue.popleft()
            for dx, dy in [(-1,0), (1,0), (0,-1), (0,1)]:
                nx, ny = cx + dx, cy + dy
                if (0 <= nx < height and 0 <= ny < width and 
                    not visited[nx, ny] and binary_img[nx, ny] == 0):
                    visited[nx, ny] = True
                    filled[nx, ny] = 1
                    queue.append((nx, ny))

    # Start flood fill from border
    for i in range(height):
        # Check the first column  of each row
        if binary_img[i, 0] == 0:  # If border pixel is 0 (background), start flood fill 
            flood_fill(i, 0)
        if width > 1 and binary_img[i, width-1] == 0:
            flood_fill(i, width-1)
    for j in range(width):
        #check the first row of each column
        if binary_img[0, j] == 0:  # If border pixel is 0 (background), start flood fill
            flood_fill(0, j)
        if height > 1 and binary_img[height-1, j] == 0:
            flood_fill(height-1, j)

    # Invert filled image to get hole-filled image: `0` becomes `1` and `1` becomes `0`
    hole_filled = np.where(filled == 0, 1, 0).astype(np.uint8)
    
    # Combine the original image with the hole-filled image (background remains intact)
    return np.maximum(binary_img, hole_filled)

### Step 4: Edge Detection 

1. Sobel Operator

In [25]:
def sobel_filter(image):
    kernel_size = 3
    padding = (kernel_size-1) // 2
    padded_image = np.pad(image, padding, mode='edge')
    output_magnitude = np.copy(image)
    output_direction = np.copy(image)
    
    sobel_horizontal = [[-1, -2, -1], [0, 0, 0], [1, 2, 1]]
    sobel_vertical = [[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]
    
    for i in range(image.shape[0]):
        for j in range(image.shape[1]):
            region = padded_image[i:i + kernel_size, j:j + kernel_size]
            
            gx = np.sum(region * sobel_horizontal)
            gy = np.sum(region * sobel_vertical)
            
            output_magnitude[i, j] = np.sqrt(gx**2 + gy**2)
            output_direction[i, j] = np.arctan2(gy, gx) * (180/np.pi) # convert to degrees      
     
    output_direction[output_direction < 0] += 180
    return output_magnitude, output_direction

2. Non-maximum Suppression

In [26]:
# to keep the pixels having values greater than their 2 neighbours
def Non_maximum_suppression(mag_image, angle_matrix):
    output = np.copy(mag_image)
    
    for i in range(1, mag_image.shape[0]-1):
        for j in range(1, mag_image.shape[1]-1):
            q = 0
            r = 0
            
            #angle 0
            if(0 <= angle_matrix[i, j] < 22.5) or (157.5 <= angle_matrix[i, j] <= 180):
                q = mag_image[i, j+1]
                r = mag_image[i, j-1]
            #angle 45
            elif (22.5 <= angle_matrix[i, j] < 67.5):
                q = mag_image[i+1, j-1]
                r = mag_image[i-1, j+1]
            #angle 90
            elif (67.5 <= angle_matrix[i,j] < 112.5):
                q = mag_image[i+1, j]
                r = mag_image[i-1, j]
            #angle 135
            elif (112.5 <= angle_matrix[i,j] < 157.5):
                q = mag_image[i-1, j-1]
                r = mag_image[i+1, j+1]
                
            if (mag_image[i, j] >= q) and (mag_image[i, j] >= r):
                output[i, j] = mag_image[i, j]
            else:
                output[i, j] = 0

    return output

3. Double Thresholding

In [27]:
# helps reduce noise and preserve meaningful edges
# to classify pixels as strong(confirm edge), weak(can be edge), non-relevant(not an edge) pixels
def thresholding(image, high_thresh=0.1, low_thresh=0.03):
    strong = 255
    weak = 75
    max_val = np.max(image)

    h_th = max_val * high_thresh
    l_th = h_th * low_thresh

    output = np.copy(image)
    
    output[image >= h_th] = strong
    output[(image < h_th) & (image >= l_th)] = weak
    output[image < l_th] = 0
    
    return output, strong , weak

4. Edge Tracking

In [28]:
# if a pixel has atleast 1 strong pixel around, make the weak pixel strong as well.
def edge_tracking(image, strong, weak):
    output = image.copy()
    for i in range(1, image.shape[0]-1):
        for j in range(1, image.shape[1]-1):
            if image[i, j] == weak:
                region = image[i-1:i+2, j-1: j+2]
                if np.any(region == strong):
                    output[i, j] = strong
                else:
                    output[i, j] = 0
                    
    return output

In [29]:
def canny_edge_detector(image):
    # check dimensions if 3, convert to grayscale
    if image.ndim == 3 and image.shape[2] == 3:
        gray_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    # if already grayscale
    elif image.ndim == 2:
        gray_image = image
        
    # remove noise
    blurred = gaussian_filter_on_channel(gray_image)

    # apply sobel
    mag, angle = sobel_filter(blurred)

    # apply non-maximum suppression
    suppressed = Non_maximum_suppression(mag, angle)

    # double thresholding
    threshold_image, strong, weak = thresholding(suppressed)

    # track edges
    edges = edge_tracking(threshold_image, strong, weak)
    return edges

### Step 5: Geometric Normalization 

In [30]:
# def affine_transformations(image, angle_degrees = 90):
#     rows, columns = image.shape[:2]
#     center_x, center_y = columns/2, rows/2
#     target_width, target_height = 200, 200
    
#     angle_radians = np.deg2rad(angle_degrees)
#     cos_theeta = np.cos(angle_radians)
#     sin_theeta = np.sin(angle_radians)
    
#     # rotation
#     rotation = [[cos_theeta, -sin_theeta, 0], [sin_theeta, cos_theeta, 0], [0,0,1]]
    
#     corners_rel_to_center = np.array([
#         [-center_x, -center_y, 1], # Top-left relative to center
#         [rows - center_x, -center_y, 1], # Top-right relative to center
#         [rows - center_x, columns - center_y, 1], # Bottom-right relative to center
#         [-center_x, columns - center_y, 1]]).T # Transpose to make each column a point [x,y,1] (shape 3x4)
    
#     # Rotate these centered corners around (0,0)
#     rotated_corners_at_origin = rotation @ corners_rel_to_center

#     width_after_rotation = columns
#     height_after_rotation = rows
  
#     # scaling
#     scale_x = target_width / width_after_rotation
#     scale_y = target_height / height_after_rotation 
    
#     scaling = np.array([[scale_x, 0, 0], [0, scale_y, 0],[0,0,1]])
    
#     output = image @ rotation @ scaling
    
#     return output

def label_connected_components_manual(binary_mask_01):
    if binary_mask_01.size == 0: return np.array([], dtype=np.int32), 0
    rows, cols = binary_mask_01.shape
    labeled_mask = np.zeros_like(binary_mask_01, dtype=np.int32); current_label_val = 0
    visited_cca = np.zeros_like(binary_mask_01, dtype=bool)
    for r_init in range(rows):
        for c_init in range(cols):
            if binary_mask_01[r_init, c_init] == 1 and not visited_cca[r_init, c_init]:
                current_label_val += 1; q_bfs = deque([(r_init, c_init)])
                visited_cca[r_init, c_init] = True; labeled_mask[r_init, c_init] = current_label_val
                while q_bfs:
                    r_curr, c_curr = q_bfs.popleft()
                    for dr_bfs, dc_bfs in [(0,1), (0,-1), (1,0), (-1,0)]:
                        nr_bfs, nc_bfs = r_curr + dr_bfs, c_curr + dc_bfs
                        if 0 <= nr_bfs < rows and 0 <= nc_bfs < cols and \
                           binary_mask_01[nr_bfs, nc_bfs] == 1 and not visited_cca[nr_bfs, nc_bfs]:
                            visited_cca[nr_bfs, nc_bfs] = True; labeled_mask[nr_bfs, nc_bfs] = current_label_val
                            q_bfs.append((nr_bfs,nc_bfs))
    return labeled_mask, current_label_val

def manual_get_largest_component_mask(binary_mask_01):
    if binary_mask_01.size == 0: return np.array([], dtype=np.uint8)
    labeled_array, num_labels = label_connected_components_manual(binary_mask_01)
    if num_labels == 0: return np.zeros_like(binary_mask_01, dtype=np.uint8)
    areas = np.bincount(labeled_array.ravel())
    if len(areas) <= 1: return np.zeros_like(binary_mask_01, dtype=np.uint8)
    largest_label = np.argmax(areas[1:]) + 1
    return (labeled_array == largest_label).astype(np.uint8)

def manual_moments_from_mask(binary_mask_01):
    if binary_mask_01.size == 0: return {'m00':0,'cx':0,'cy':0,'angle':0}
    y_coords, x_coords = np.where(binary_mask_01 == 1)
    m00 = len(x_coords)
    if m00 == 0: return {'m00':0,'cx':0,'cy':0,'angle':0}
    m10 = np.sum(x_coords); m01 = np.sum(y_coords)
    cx = m10 / m00; cy = m01 / m00
    x_prime = x_coords - cx; y_prime = y_coords - cy
    mu20 = np.sum(x_prime**2); mu02 = np.sum(y_prime**2); mu11 = np.sum(x_prime * y_prime)
    angle_rad = 0.5 * np.arctan2(2 * mu11, mu20 - mu02)
    angle_deg = np.degrees(angle_rad)
    final_rotation_angle = -angle_deg
    return {'m00': m00, 'cx': cx, 'cy': cy, 'angle': final_rotation_angle}

def manual_bounding_rect_from_mask(binary_mask_01):
    if binary_mask_01.size == 0 or np.sum(binary_mask_01) == 0: return None
    rows, cols = np.where(binary_mask_01 == 1)
    min_r, max_r = np.min(rows), np.max(rows)
    min_c, max_c = np.min(cols), np.max(cols)
    return (min_c, min_r, max_c - min_c + 1, max_r - min_r + 1)

def manual_rotate_image(image, angle_deg, center_xy=None, interpolation='bilinear'):
    rows, cols = image.shape[:2]; is_color = image.ndim == 3
    center_x, center_y = (cols / 2.0, rows / 2.0) if center_xy is None else center_xy
    angle_rad = np.radians(angle_deg); cos_a, sin_a = np.cos(angle_rad), np.sin(angle_rad)
    corners = np.array([[0,0],[cols-1,0],[cols-1,rows-1],[0,rows-1]],dtype=np.float32) - np.array([center_x,center_y])
    rot_corn_x = corners[:,0]*cos_a - corners[:,1]*sin_a + center_x
    rot_corn_y = corners[:,0]*sin_a + corners[:,1]*cos_a + center_y
    min_x,max_x=np.min(rot_corn_x),np.max(rot_corn_x); min_y,max_y=np.min(rot_corn_y),np.max(rot_corn_y)
    new_cols,new_rows=int(np.ceil(max_x-min_x)),int(np.ceil(max_y-min_y))
    rotated_image = np.zeros((new_rows, new_cols, image.shape[2]) if is_color else (new_rows,new_cols), dtype=image.dtype)
    for r_out in range(new_rows):
        for c_out in range(new_cols):
            dest_x_orig_sys, dest_y_orig_sys = c_out+min_x, r_out+min_y
            src_c = center_x + (dest_x_orig_sys-center_x)*cos_a + (dest_y_orig_sys-center_y)*sin_a
            src_r = center_y - (dest_x_orig_sys-center_x)*sin_a + (dest_y_orig_sys-center_y)*cos_a
            if interpolation == 'nearest':
                src_r_int, src_c_int = int(round(src_r)), int(round(src_c))
                if 0<=src_r_int<rows and 0<=src_c_int<cols: rotated_image[r_out,c_out]=image[src_r_int,src_c_int]
            elif interpolation == 'bilinear':
                if not (0<=src_r<rows-1 and 0<=src_c<cols-1): continue
                x1,y1=int(np.floor(src_c)),int(np.floor(src_r)); x2,y2=x1+1,y1+1
                q11,q12,q21,q22=image[y1,x1],image[y2,x1],image[y1,x2],image[y2,x2]
                dx,dy = src_c-x1, src_r-y1
                top=(1-dx)*q11+dx*q21; bottom=(1-dx)*q12+dx*q22; val=(1-dy)*top+dy*bottom
                rotated_image[r_out,c_out]=np.clip(val,0,255).astype(image.dtype)
    return rotated_image

def manual_scale_image(image, target_wh, interpolation='bilinear'):
    target_w, target_h = target_wh
    if image.size==0 or target_w<=0 or target_h<=0: return np.zeros((target_h,target_w,image.shape[2]) if image.ndim==3 and image.shape[2] else (target_h,target_w), dtype=image.dtype)
    src_h,src_w=image.shape[:2]; is_color=image.ndim==3
    scaled_image = np.zeros((target_h,target_w,image.shape[2]) if is_color else (target_h,target_w),dtype=image.dtype)
    ratio_w,ratio_h = src_w/target_w, src_h/target_h
    for r_out in range(target_h):
        for c_out in range(target_w):
            src_c,src_r = (c_out+0.5)*ratio_w-0.5, (r_out+0.5)*ratio_h-0.5
            if interpolation == 'nearest':
                src_r_int,src_c_int = max(0,min(int(round(src_r)),src_h-1)), max(0,min(int(round(src_c)),src_w-1))
                scaled_image[r_out,c_out]=image[src_r_int,src_c_int]
            elif interpolation == 'bilinear':
                if not (0<=src_r<src_h-1 and 0<=src_c<src_w-1):
                    src_r_int_b,src_c_int_b = max(0,min(int(round(src_r)),src_h-1)), max(0,min(int(round(src_c)),src_w-1))
                    scaled_image[r_out,c_out]=image[src_r_int_b,src_c_int_b]; continue
                x1,y1=int(np.floor(src_c)),int(np.floor(src_r)); x2,y2=min(x1+1,src_w-1),min(y1+1,src_h-1)
                q11,q12,q21,q22=image[y1,x1],image[y2,x1],image[y1,x2],image[y2,x2]
                dx,dy = src_c-x1, src_r-y1
                top=(1-dx)*q11+dx*q21; bottom=(1-dx)*q12+dx*q22; val=(1-dy)*top+dy*bottom
                scaled_image[r_out,c_out]=np.clip(val,0,255).astype(image.dtype)
    return scaled_image

def affine_transformations(image_to_transform_rgb, edges_for_contour_uint8):
    fixed_target_size_wh = (200, 200)
    if image_to_transform_rgb is None or image_to_transform_rgb.size == 0:
        return manual_scale_image(np.array([],dtype=np.uint8), fixed_target_size_wh)
    object_mask = manual_get_largest_component_mask(edges_for_contour_uint8)
    if object_mask.size == 0 or np.sum(object_mask) == 0:
        return manual_scale_image(image_to_transform_rgb, fixed_target_size_wh, interpolation='bilinear')
    moments = manual_moments_from_mask(object_mask)
    if moments['m00'] == 0:
        return manual_scale_image(image_to_transform_rgb, fixed_target_size_wh, interpolation='bilinear')
    angle_to_rotate = moments['angle']; contour_center_xy = (moments['cx'], moments['cy'])
    rotated_rgb_image = manual_rotate_image(image_to_transform_rgb, angle_to_rotate, center_xy=contour_center_xy, interpolation='bilinear')
    rotated_object_mask = manual_rotate_image(object_mask, angle_to_rotate, center_xy=contour_center_xy, interpolation='nearest')
    bbox_rotated = manual_bounding_rect_from_mask(rotated_object_mask)
    cropped_after_rotation = rotated_rgb_image
    if bbox_rotated:
        x,y,w,h = bbox_rotated
        if w>0 and h>0:
            y_end=min(y+h,rotated_rgb_image.shape[0]); x_end=min(x+w,rotated_rgb_image.shape[1])
            y_start=max(0,y); x_start=max(0,x)
            if y_end > y_start and x_end > x_start:
                cropped_after_rotation = rotated_rgb_image[y_start:y_end, x_start:x_end]
    if cropped_after_rotation.size == 0:
        normalized = manual_scale_image(rotated_rgb_image, fixed_target_size_wh, interpolation='bilinear')
    else:
        normalized = manual_scale_image(cropped_after_rotation, fixed_target_size_wh, interpolation='bilinear')
    return normalized

### Step 6: Feature Extraction 

In [31]:
def convolve2d_manual(image_channel_float, kernel):
    k_h, k_w = kernel.shape; img_h, img_w = image_channel_float.shape
    pad_h, pad_w = k_h//2, k_w//2
    padded = np.pad(image_channel_float, ((pad_h,pad_h),(pad_w,pad_w)), mode='edge')
    out = np.zeros_like(image_channel_float, dtype=np.float32)
    for r_img in range(img_h):
        for c_img in range(img_w):
            out[r_img,c_img] = np.sum(padded[r_img:r_img+k_h, c_img:c_img+k_w]*kernel)
    return out

In [32]:
def sobel_filters_manual_for_harris(gray_float):
    Kx=np.array([[-1,0,1],[-2,0,2],[-1,0,1]],np.float32); Ky=np.array([[-1,-2,-1],[0,0,0],[1,2,1]],np.float32)
    return convolve2d_manual(gray_float,Kx), convolve2d_manual(gray_float,Ky)

In [33]:
def gaussian_kernel_2d_for_harris(size, sigma=1.):
    if size%2==0: size+=1
    ax=np.arange(-size//2+1.,size//2+1.); xx,yy=np.meshgrid(ax,ax)
    k=np.exp(-(xx**2+yy**2)/(2.*sigma**2)); return k/np.sum(k) if np.sum(k)!=0 else np.ones((size,size))/(size*size)

In [34]:
def harris_corner_detection_manual(gray_float_mask, win_sz=5, k_harris=0.04, thr_ratio=0.01):
    if gray_float_mask.size==0: return 0
    Ix,Iy=sobel_filters_manual_for_harris(gray_float_mask)
    Ixx,Iyy,Ixy = Ix**2, Iy**2, Ix*Iy
    gauss_k = gaussian_kernel_2d_for_harris(win_sz, sigma=1.5)
    Sxx,Syy,Sxy = convolve2d_manual(Ixx,gauss_k),convolve2d_manual(Iyy,gauss_k),convolve2d_manual(Ixy,gauss_k)
    detM,traceM = (Sxx*Syy)-(Sxy**2), Sxx+Syy
    R_matrix = detM - k_harris*(traceM**2)
    maxR = R_matrix.max() if R_matrix.size>0 else 0; threshold_R = maxR*thr_ratio if maxR>0 else 0
    corner_count=0
    R_pad = np.pad(R_matrix,((1,1),(1,1)),mode='constant',constant_values=(R_matrix.min()-1 if R_matrix.size>0 else -1))
    for r_corn in range(R_matrix.shape[0]):
        for c_corn in range(R_matrix.shape[1]):
            if R_matrix[r_corn,c_corn] > threshold_R and \
               R_matrix[r_corn,c_corn] == np.max(R_pad[r_corn:r_corn+3,c_corn:c_corn+3]):
                corner_count+=1
    return corner_count

In [35]:
def label_connected_components_manual(binary_mask_01):
    if binary_mask_01.size == 0: return np.array([], dtype=np.int32), 0
    rows, cols = binary_mask_01.shape
    labeled_mask = np.zeros_like(binary_mask_01, dtype=np.int32); current_label_val = 0
    visited_cca = np.zeros_like(binary_mask_01, dtype=bool)
    for r_init in range(rows):
        for c_init in range(cols):
            if binary_mask_01[r_init, c_init] == 1 and not visited_cca[r_init, c_init]:
                current_label_val += 1; q_bfs = deque([(r_init, c_init)])
                visited_cca[r_init, c_init] = True; labeled_mask[r_init, c_init] = current_label_val
                while q_bfs:
                    r_curr, c_curr = q_bfs.popleft()
                    for dr_bfs, dc_bfs in [(0,1), (0,-1), (1,0), (-1,0)]:
                        nr_bfs, nc_bfs = r_curr + dr_bfs, c_curr + dc_bfs
                        if 0 <= nr_bfs < rows and 0 <= nc_bfs < cols and \
                           binary_mask_01[nr_bfs, nc_bfs] == 1 and not visited_cca[nr_bfs, nc_bfs]:
                            visited_cca[nr_bfs, nc_bfs] = True; labeled_mask[nr_bfs, nc_bfs] = current_label_val
                            q_bfs.append((nr_bfs,nc_bfs))
    return labeled_mask, current_label_val

def manual_get_largest_component_mask(binary_mask_01):
    if binary_mask_01.size == 0: return np.array([], dtype=np.uint8)
    labeled_array, num_labels = label_connected_components_manual(binary_mask_01)
    if num_labels == 0: return np.zeros_like(binary_mask_01, dtype=np.uint8)
    areas = np.bincount(labeled_array.ravel())
    if len(areas) <= 1: return np.zeros_like(binary_mask_01, dtype=np.uint8)
    largest_label = np.argmax(areas[1:]) + 1
    return (labeled_array == largest_label).astype(np.uint8)

def manual_bounding_rect_from_mask(binary_mask_01):
    if binary_mask_01.size == 0 or np.sum(binary_mask_01) == 0: return None
    rows, cols = np.where(binary_mask_01 == 1)
    min_r, max_r = np.min(rows), np.max(rows)
    min_c, max_c = np.min(cols), np.max(cols)
    return (min_c, min_r, max_c - min_c + 1, max_r - min_r + 1)

def get_contour_properties_from_mask_manual(binary_mask_01):
    if binary_mask_01.size == 0: return 0, 0, None
    object_mask_largest = manual_get_largest_component_mask(binary_mask_01)
    if np.sum(object_mask_largest) == 0: return 0, 0, None
    area = np.sum(object_mask_largest)
    padded_mask = np.pad(object_mask_largest, 1, 'constant', constant_values=0)
    perimeter_robust = 0
    for r_p in range(1, padded_mask.shape[0]-1):
        for c_p in range(1, padded_mask.shape[1]-1):
            if padded_mask[r_p, c_p] == 1:
                if padded_mask[r_p-1,c_p]==0 or padded_mask[r_p+1,c_p]==0 or \
                   padded_mask[r_p,c_p-1]==0 or padded_mask[r_p,c_p+1]==0:
                    perimeter_robust +=1
    bbox = manual_bounding_rect_from_mask(object_mask_largest)
    return area, perimeter_robust, bbox

feature extraction function

In [36]:
def extract_sign_features(norm_rgb_img, norm_binary_mask, norm_hsv_img):
    features = {}
    default_feats = {'corner_count':0, 'area':0, 'perimeter':0, 'circularity':0.0,
                     'aspect_ratio':0.0, 'extent':0.0, 'average_hue':-1.0,
                     'red_percent':0.0, 'blue_percent':0.0}
    if norm_binary_mask.size == 0 or norm_rgb_img.size == 0 or norm_hsv_img.size == 0:
        return default_feats.copy()
    features['corner_count'] = harris_corner_detection_manual(norm_binary_mask.astype(np.float32))
    area, perimeter, bbox = get_contour_properties_from_mask_manual(norm_binary_mask)
    features['area'] = area; features['perimeter'] = perimeter
    if area == 0 or perimeter == 0 or bbox is None:
        features.update(default_feats); features['corner_count'] = harris_corner_detection_manual(norm_binary_mask.astype(np.float32))
        features['area'] = area; features['perimeter'] = perimeter
        return features
    features['circularity'] = (4 * np.pi * area) / (perimeter**2) if perimeter > 0 else 0.0
    _, _, box_w, box_h = bbox
    features['aspect_ratio'] = float(box_w) / box_h if box_h > 0 else 0.0
    features['extent'] = area / float(box_w * box_h) if (box_w * box_h) > 0 else 0.0
    hue_channel = norm_hsv_img[:, :, 0]
    hues_in_mask = hue_channel[norm_binary_mask == 1]
    if hues_in_mask.size > 0:
        hues_rad = hues_in_mask * (2 * np.pi / 180.0)
        sum_cos = np.sum(np.cos(hues_rad)); sum_sin = np.sum(np.sin(hues_rad))
        mean_rad = np.arctan2(sum_sin, sum_cos); mean_deg = mean_rad * (180.0 / np.pi)
        if mean_deg < 0: mean_deg += 360.0
        features['average_hue'] = mean_deg / 2.0
    else: features['average_hue'] = -1.0
    total_pixels_norm = float(norm_hsv_img.shape[0] * norm_hsv_img.shape[1])
    if total_pixels_norm > 0 :
        _ , red_mask_norm, blue_mask_norm = segment_red_blue(norm_hsv_img)
        features['red_percent'] = (np.sum(red_mask_norm > 0) / total_pixels_norm) * 100.0
        features['blue_percent'] = (np.sum(blue_mask_norm > 0) / total_pixels_norm) * 100.0
    else: features['red_percent'] = 0.0; features['blue_percent'] = 0.0
    return features

Image Claasifier Model

In [93]:
class ImageClassifier:
    def __init__(self):
        self.results = []
    
    #classification of images
    def classify_sign_with_rules(self, features):
        colors = {'red': features['red_percent'], 'blue': features['blue_percent']}
        dominant_color = max(colors, key=colors.get)

        circularity = features.get('circularity', 0)
        aspect_ratio = features.get('aspect_ratio', 1)
        ext = features.get('extent', 0)
        corners = features.get('corner_count', 0)
        avg_hue = features.get('average_hue', 0)

        # class 0: speed limit 20 (red circle)
        if (dominant_color == 'red' and (4 <= corners <= 30) and 0.7 <= circularity <= 1.3 and aspect_ratio >= 0.6 and ext >= 0.5 and 0 <= avg_hue <= 12):
           return 0
        # class 33: Go right
        elif dominant_color == 'red' and (corners > 50) and circularity > 0.3 and (aspect_ratio < 1.5) and (0.3 < ext < 0.8) and (3.0 < avg_hue < 10.0):
            return 33
        # class 30: snow sign
        elif dominant_color == 'red' and (corners < 25) and (0.6 <= circularity <= 1.5) and aspect_ratio <= 1.5 and (0.4 <= ext <= 0.85) and (4.5 <= avg_hue <= 9.9):
            return 30
        # class 14: stop sign (red octagon)
        elif dominant_color == 'red' and (4 <= corners <= 15)  and (circularity < 1.5) and (8 > aspect_ratio > 0.2) and ext < 1.0 and (avg_hue < 10):
            return 14
        # class 40: U-turn
        elif corners > 30  and (circularity < 0.99) and (0.3 <= aspect_ratio < 1.5) and (0.3 <= ext <= 0.9) and (5.0 <= avg_hue <= 8.5):
            return 40
        # class 38: Keep right
        elif  (corners > 50) and circularity < 0.5 and (aspect_ratio < 2.5) and (ext < 1.0) and (5.0 <= avg_hue <= 10.0):
            return 38

        return 0
    
    # pipeline execution
    def process_image(self, image):
        # sharpened image
        sharpened_img = unsharp_masking(image)

        # image segmentation
        hsv_image = RGBtoHSV(sharpened_img)

        # mask segmentation
        combined_image, _, _ = segment_red_blue(hsv_image)

        # apply binary threshold
        binary_mask_01 = binary_threshold(combined_image, thresh=1)

        # morphological operations (erosion, dilation, opening)
        morph = opening(binary_mask_01)
        cleaned = connected_component_filtering(morph, area_threshold=100)
        filled = hole_filling(cleaned)
        
        # edge detection
        edges = canny_edge_detector((filled * 255).astype(np.uint8))
        
        # geometric normalization
        normalized_image = affine_transformations(sharpened_img, edges)

        # feature extraction
        normalized_hsv = RGBtoHSV(normalized_image)
        norm_combined_mask,_,_ = segment_red_blue(normalized_hsv)
        norm_binary_mask = binary_threshold(norm_combined_mask)
        norm_opened_mask = opening(norm_binary_mask)
        norm_cleaned_mask = connected_component_filtering(norm_opened_mask)
        final_norm_mask_for_features = hole_filling(norm_cleaned_mask)

        features = extract_sign_features(normalized_image, final_norm_mask_for_features, normalized_hsv)

        if (features['corner_count'] == 0 and features['area'] == 0 and features['perimeter'] == 0 and features['circularity'] == 0.0 and features['aspect_ratio'] == 0.0 and features['extent'] == 0.0 and features['average_hue'] == -1.0 and features['red_percent'] == 0.0 and features['blue_percent'] == 0.0):
            return None
        else:
            return features
    
    def train(self):
        true_labels = []
        predicted_labels = []
        print("Training started.......")
        print(" ")
        for class_id in os.listdir("SelectedData"):
            path = os.path.join("SelectedData",class_id)
            for image_name in os.listdir(path):
                image_path = os.path.join(path, image_name)

                image = cv2.imread(image_path)
                print("Processing image : ", image_name)
                features = self.process_image(image) # sending image array
                if features == None:
                    continue  # skip the None example

                else:
                    prediction = self.classify_sign_with_rules(features)
                    self.results.append({'filename':image_name, 'actual label':class_id, 'prediction': prediction})
                    true_labels.append(str(class_id))
                    predicted_labels.append(str(prediction))
                    
        if not self.results:
            print("Images not processed successfully.")
    
        # saving results to csv
        df = pd.DataFrame(self.results)
        df.to_csv("results.csv", index=False)

        # calculate metrics (overall accuracy, class-wise accuracy, class-wise precision, class-wise recall)

        correct = sum(1 for t, p in zip(true_labels, predicted_labels) if t == p)
        total = len(true_labels)
        overall_accuracy = (correct / total) * 100
        labels = [0, 14, 30, 33, 38, 40]

        print(f"Overall Accuracy: {overall_accuracy:.2f}%")

        metrics_summary = [f"Overall Accuracy: {overall_accuracy:.4f}\n", "--- Class-wise Metrics ---"]

        for label in labels:
            label = str(label)
            if label == -1:
                continue

            else:
                tp = np.sum(1 for t, p in zip(true_labels, predicted_labels) if t == label and p == label)
                fp = np.sum(1 for t, p in zip(true_labels, predicted_labels) if t != label and p == label)
                fn = np.sum(1 for t, p in zip(true_labels, predicted_labels) if t == label and p != label)

                precision = tp / (tp + fp) if (tp + fp) > 0 else 0
                recall = tp / (tp + fn) if (tp + fn) > 0 else 0
                f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

                metrics_summary.extend([f"\nClass: {label}", f"  Precision: {precision:.4f}", f"  Recall:    {recall:.4f}", f"  F1-Score:  {f1_score:.4f}"])

        # write results to file
        with open('metrics.txt', 'w') as f:
            for line in metrics_summary:
                f.write(line + '\n')


        # create confusion matrix
        cm = confusion_matrix(true_labels, predicted_labels)
        plt.figure(figsize = (6, 5))
        sns.heatmap(cm, annot = True, xticklabels=labels, yticklabels=labels, cmap='Blues')
        plt.xlabel('Predicted labels')
        plt.ylabel('True')
        plt.title('Confusion Matrix')

        plt.savefig('confusion_matrix.png', dpi=300, bbox_inches='tight')
        plt.close()

Training the model

In [94]:
model = ImageClassifier()
model.train()

print("Model Evalution commpleted. Results saved to results.csv, metrics.txt, confusion_matrix.png")

Training started.......
 
Processing image :  00000_00000_00000.png
Processing image :  00000_00000_00001.png
Processing image :  00000_00000_00002.png
Processing image :  00000_00000_00003.png
Processing image :  00000_00000_00004.png
Processing image :  00000_00000_00005.png
Processing image :  00000_00000_00006.png
Processing image :  00000_00000_00007.png
Processing image :  00000_00000_00008.png
Processing image :  00000_00000_00009.png
Processing image :  00000_00000_00010.png
Processing image :  00000_00000_00011.png
Processing image :  00000_00000_00012.png
Processing image :  00000_00000_00013.png
Processing image :  00000_00000_00014.png
Processing image :  00000_00000_00015.png
Processing image :  00000_00000_00016.png
Processing image :  00000_00000_00017.png
Processing image :  00000_00000_00018.png
Processing image :  00000_00000_00019.png
Processing image :  00000_00000_00020.png
Processing image :  00000_00000_00021.png
Processing image :  00000_00000_00022.png
Processi

  tp = np.sum(1 for t, p in zip(true_labels, predicted_labels) if t == label and p == label)
  fp = np.sum(1 for t, p in zip(true_labels, predicted_labels) if t != label and p == label)
  fn = np.sum(1 for t, p in zip(true_labels, predicted_labels) if t == label and p != label)


Model Evalution commpleted. Results saved to results.csv, metrics.txt, confusion_matrix.png
