In [60]:
from commonfunctions import *

### Parameters

Annotation => filename ; y ; x ; x_min ; y_min ; x_max ; y_max ; category

In [61]:
IMAGE_SIZE = (256, 256)  # To resize all images to 64x64
DATASET_PATH = "TSRD_Train"  # Path to your dataset folder
ANNOTATION_FILE = "TSRD_Train_Annotation/TsignRecgTrain4170Annotation.txt"  # Path to the annotation file
SAVE_PATH = "processed_data.npz"  # File to save the processed dataset (NPZ to store NumPy arrays)
TEST_DATASET_PATH = "images"  # Path to your test dataset folder
TEST_ANNOTATION_FILE = "TSRD_Test_Annotation/TsignRecgTest1994Annotation.txt"  # Path to the test annotation file

### Helper functions

In [62]:
# Function to parse annotation file #####################################################
def parse_annotations(annotation_file):
    annotations = []
    with open(annotation_file, "r") as file:
        for line in file:
            parts = line.strip().split(";")
            filename = parts[0]
            y_axis = int(parts[1])
            x_axis = int(parts[2])
            bound_x_min = int(parts[3])
            bound_y_min = int(parts[4])
            bound_x_max = int(parts[5])
            bound_y_max = int(parts[6])
            label = int(parts[7])
            annotations.append({
                "filename": filename,
                "y_axis": y_axis,
                "x_axis": x_axis,
                "bound_x_min": bound_x_min,
                "bound_y_min": bound_y_min,
                "bound_x_max": bound_x_max,
                "bound_y_max": bound_y_max,
                "label": label
            })
    return annotations


# Function to load images ################################################
def load_images(image_folder, annotations):
    images = []
    labels = []

    for ann in annotations:
        # Get the image path
        img_path = os.path.join(image_folder, ann["filename"])
        # Read the image
        img = io.imread(img_path)
        if img is None:
            print(f"Failed to load image: {img_path}")
            continue
        images.append(img)
        labels.append(ann["label"])
    return images, labels

# get the acutal labels of the test dataset
def get_actual_labels(TEST_DATASET_PATH):
    actual_labels = []
    for filename in os.listdir(TEST_DATASET_PATH):
        label = int(filename.split("_")[0])
        actual_labels.append(label)
    return actual_labels

### Image Preprocessing

In [63]:

# Function to load and preprocess images ################################################
def preprocess_images(image_folder, annotations, image_size):

    # load images and labels
    images, labels = load_images(image_folder, annotations)

    processed_images = []

    for i in range(len(images)):
    # for i in range(10):
    
        ann = annotations[i]
        img = images[i]

        # Crop the bounding box
        cropped_img = img[ann["bound_y_min"]:ann["bound_y_max"], ann["bound_x_min"]:ann["bound_x_max"]]

        # Convert to grayscale
        # gray_img = (rgb2gray(cropped_img)*255).astype(np.uint8)
        # print (gray_img)

        # histogram equalization
        # hist_eq_img = histogram_eq(gray_img)
        # show_images([gray_img, hist_eq_img], ["Original", "Histogram Equalized"])

        # Resize the image
        resized_img = cv.resize(cropped_img, image_size, interpolation=cv.INTER_CUBIC)

        # crop the circluar region with center equals the center of the bounding box and radius equals the half of the bounding box width
        mask1 = np.zeros_like(resized_img)
        center = (int(resized_img.shape[1]/2), int(resized_img.shape[0]/2))
        radius = int(resized_img.shape[1]/2)
        mask1 = cv.circle(mask1, center, radius, (255, 255, 255), -1)
        masked_img = cv.bitwise_and(resized_img, mask1)
        # show_images([img, resized_img ,  masked_img], ["Original", "Resized" , "Masked"])

        # Normalize pixel values
        # normalized_img = masked_img / 255.0
        normalized_img = masked_img

        # show_images([img, cropped_img, resized_img, normalized_img], ["Original", "Cropped", "Resized", "Normalized"])
        # show_images([img, resized_img, normalized_img], ["Original", "Resized", "Normalized"])

        processed_images.append(normalized_img)

    # # Convert to NumPy arrays
    processed_images = np.array(processed_images)
    labels = np.array(labels)

    return processed_images, labels

### Feature Extraction

In [64]:
# Shape-Based Features (edge detection, Hough transform)

#canny edge detection
def canny_edge_detection(image):
    edges = cv.Canny((image * 255).astype(np.uint8), 40, 80)
    return edges

# Lines Hough transform
def lines_hough_transform(image):
    gray = rgb2gray(image)
    edges = canny_edge_detection(gray)
    lines = cv.HoughLinesP(edges, rho=1, theta=np.pi/180, threshold=45, minLineLength=20, maxLineGap=10)
    return lines

# Circles Hough transform
def circles_hough_transform(image):
    gray = rgb2gray(image)
    edges = canny_edge_detection(gray)
    circles = cv.HoughCircles(edges, cv.HOUGH_GRADIENT, dp=1.5, minDist=300, param1=50, param2=30, minRadius=110, maxRadius=200)
    return circles

# HOG
def extract_hog_features(image):
    # gray = rgb2gray(image)
    hog_features = hog(image, 
                       orientations=9, 
                       pixels_per_cell=(8, 8), 
                       cells_per_block=(2, 2), 
                       block_norm='L2-Hys', 
                       visualize=False, 
                       feature_vector=True)
    return hog_features

# Keypoint-Based Features

# Harris corner detection
def harris_corner_detection(image):
    gray = rgb2gray(image)
    gray = np.float32(gray)
    dst = cv.cornerHarris(gray, blockSize=2, ksize=3, k=0.04)
    return dst

# SIFT 
def sift_features(image):
    # image = np.uint8(image * 255)
    # gray = rgb2gray(image)
    sift = cv.xfeatures2d.SIFT_create()
    keypoints, descriptors = sift.detectAndCompute(image, None)
    return keypoints, descriptors


# color-based features

# Color histogram
def color_histogram(image, bins=(8, 8, 8)):
    hist = cv.calcHist([image], [0, 1, 2], None, bins, [0, 256, 0, 256, 0, 256])
    return cv.normalize(hist, hist).flatten()


# Extracting Combined Features
def extract_combined_features(image, sift_features):
    
    # Convert to grayscale for SIFT and HOG
    gray_image = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
    
    # # SIFT features
    # sift = cv.SIFT_create()
    # keypoints, descriptors = sift.detectAndCompute(gray_image, None)
    # sift_features = descriptors.flatten() if descriptors is not None else np.zeros(128)

    # HOG features
    hog_features = extract_hog_features(gray_image)

    # Color histogram features
    color_histogram_features = color_histogram(image)

    # Combine features into a single vector
    combined_features = np.hstack([sift_features, hog_features, color_histogram_features])
    return combined_features

### classificaion


In [65]:
from collections import Counter
# # create vocabulary using KMeans clustering
# def create_vocabulary(descriptors, k):
#     kmeans = KMeans(n_clusters=k, random_state=42)
#     kmeans.fit(descriptors)
#     return kmeans

# generate BoVW histograms
def generate_bovw_histograms(descriptors_list, kmeans, k):
    histograms = []
    for descriptors in descriptors_list:
        if descriptors is not None:
            words = kmeans.predict(descriptors)
            histogram, _ = np.histogram(words, bins=np.arange(k+1), density=True)
        else:
            histogram = np.zeros(k)
        histograms.append(histogram)
    return np.array(histograms)

def create_vocabulary(descriptors_list, num_clusters=100):
    # Stack all descriptors into one array
    all_descriptors = np.vstack([desc for desc in descriptors_list if desc is not None])
    
    # Cluster descriptors into visual words
    kmeans = KMeans(n_clusters=num_clusters, random_state=42)
    kmeans.fit(all_descriptors)
    return kmeans

def extract_bow_features(image, kmeans):
    gray_image = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
    sift = cv.SIFT_create()
    keypoints, descriptors = sift.detectAndCompute(gray_image, None)
    
    if descriptors is None:
        return np.zeros(kmeans.n_clusters)
    
    # Build histogram of visual words
    words = kmeans.predict(descriptors)
    histogram, _ = np.histogram(words, bins=np.arange(kmeans.n_clusters+1), density=True)

    return histogram

### Train


In [66]:
def train ():
    
    # Parse annotations
    annotations = parse_annotations(ANNOTATION_FILE)

    # Load and preprocess images
    images, labels = preprocess_images(DATASET_PATH, annotations, IMAGE_SIZE)

    print("Succesfully preprocessed images")
    print("Extracting features...")
    
    #SIFT
    descriptors_list = [sift_features(img)[1] for img in images]
    kmeans = create_vocabulary(descriptors_list, num_clusters=100)

    # Extract BoVW Features for each image
    features = []
    feature_labels = []

    # # descreptors list for all images
    # descriptors_list = []

    for i in range(len(images)):
        img = images[i]
        label = labels[i]

        bow_features = extract_bow_features(img, kmeans)
        combined_features = extract_combined_features(img, bow_features)
        features.append(combined_features)
        feature_labels.append(label)

        # Apply Canny edge detection
        # edges = canny_edge_detection(img)

        # # Apply Hough transform for lines
        # lines = lines_hough_transform(img)
        # if lines is not None:
        #     for line in lines:
        #         x1, y1, x2, y2 = line[0]
        #         cv.line(img, (x1, y1), (x2, y2), (0, 255, 0), 2)


        # # Apply Hough transform for circles
        # detected_circles = circles_hough_transform(img)

        # if detected_circles is not None: 
        #     print ("detected_circles", detected_circles)

        #     # Convert the circle parameters a, b and r to integers. 
        #     detected_circles = np.uint16(np.around(detected_circles)) 

        #     for pt in detected_circles[0, :]: 
        #         a, b, r = pt[0], pt[1], pt[2] 

        #         # Draw the circumference of the circle. 
        #         cv.circle(img, (a, b), r, (0, 255, 0), 2) 

        #         # Draw a small circle (of radius 1) to show the center. 
        #         cv.circle(img, (a, b), 1, (0, 0, 255), 3) 

        # # apply Harris corner detection
        # dst = harris_corner_detection(img)
        # dst = cv.dilate(dst, None)
        # img[dst > 0.01 * dst.max()] = [0, 255, 0]

        # print ("img", img * 255)

        # apply SIFT
        # keypoints, descriptors = sift_features(img)
        # if descriptors is not None:
        #     descriptors_list.append(descriptors)


        # img=cv.drawKeypoints(img, keypoints,img,flags=cv.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
        # cv.imwrite('sift_keypoints.jpg',img)
        # show_images([img], ["SIFT"])

        # show_images([img, edges], ["Original", "Edges"])
        # print(f"Label: {label}")


    # stack all descriptors
    # stack_descriptors = np.vstack(descriptors_list)

    # # create vocabulary using KMeans clustering
    # k = 290 # number of clusters (total number of keypoints / number of dataset images * number of classes) (Average)
    # kmeans = create_vocabulary(stack_descriptors, k)

    # print("descriptors_list_dimentions", len(descriptors_list))
    # print ("stack_descriptors_shape", stack_descriptors.shape)
    # print ("kmeans", kmeans)

    # pca = PCA(n_components=2)
    # reduced_data = pca.fit_transform(descriptors)

    # # Now separate the data, Note the flatten()
    # A = stack_descriptors[labels.ravel()==0]
    # B = stack_descriptors[labels.ravel()==1]

    # # Plot the data
    # plt.scatter(A[:,0],A[:,1])
    # plt.scatter(B[:,0],B[:,1],c = 'r')
    # plt.scatter(centers[:,0],centers[:,1],s = 80,c = 'y', marker = 's')
    # plt.xlabel('Height'),plt.ylabel('Weight')
    # plt.show()

    # # Generate BoVW Histograms for Each Image
    # print("Generating BoVW histograms...")
    # images_histograms = generate_bovw_histograms(descriptors_list, kmeans, k)
    # print ("images_histograms_shape", images_histograms.shape)

    # standardize the features
    # images_histograms = StandardScaler().fit_transform(images_histograms)

    # Train SVM classifier
    classifier = make_pipeline(StandardScaler(), SVC(kernel='rbf', class_weight='balanced'))
    # classifier = SVC(kernel='rbf', class_weight='balanced')
    classifier.fit(features, feature_labels)

    return classifier, kmeans


### Predict


In [67]:
def predict (classifier, kmeans):

    predicted_labels = []
    
    # try for the the blind test set folder
    # Load images
    blind_test_images = []
    for filename in os.listdir(TEST_DATASET_PATH):
        img_path = os.path.join(TEST_DATASET_PATH, filename)
        img = io.imread(img_path)
        blind_test_images.append(img)

    # # Preprocess images
    # blind_test_images = np.array(blind_test_images)
    # blind_test_images = blind_test_images / 255.0

    # Extract SIFT features
    blind_test_descriptors_list = []
    for img in blind_test_images:
        # keypoints, descriptors = sift_features(img)
        # blind_test_descriptors_list.append(descriptors)

        # img=cv.drawKeypoints(img, keypoints,img,flags=cv.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
        # cv.imwrite('sift_keypoints.jpg',img)
        # show_images([img], ["SIFT"])

        # Handle edge case where no descriptors are found
        # if descriptors is None:
        #     descriptors = np.zeros((1, 128), dtype=np.float32)

        # # Assign descriptors to the nearest cluster center (for each keypoint in the descriptor, find the nearest cluster center (nears word))
        # words = kmeans.predict(descriptors)

        # # Build histogram of visual words
        # histogram = np.zeros(kmeans.n_clusters, dtype=np.float32)
        # for word in words:
        #     histogram[word] += 1

        # Normalize histogram
        # histogram /= len(words)
        # print ("histogram", histogram)


        bow_features = extract_bow_features(img, kmeans)
        # print ("bow_features_shape", bow_features.shape)
        # print ("bow_features", bow_features)
        combined_features = extract_combined_features(img, bow_features)
        # print ("combined_features_shape", combined_features.shape)
        # Ensure the feature vector has the correct size
        expected_size = classifier.steps[-1][1].n_features_in_
        current_size = combined_features.shape[0]
        
        if current_size < expected_size:
            # Pad with zeros if smaller
            # print(f"Padding: Current size {current_size}, Expected size {expected_size}")
            combined_features = np.pad(
                combined_features,
                (0, expected_size - current_size),
                mode='constant'
            )
        elif current_size > expected_size:
            # Truncate if larger
            # print(f"Truncating: Current size {current_size}, Expected size {expected_size}")
            combined_features = combined_features[:expected_size]

        # predict the label
        predicted_label = classifier.predict(combined_features.reshape(1, -1))
        predicted_labels.append(predicted_label[0])
        # print(f"Predicted label: {predicted_label[0]}")

    return predicted_labels

### Run the Training phase

In [68]:
classifier, Kmeans = train()

Succesfully preprocessed images
Extracting features...


#### Main (predict test images)


In [None]:
# Main execution
if __name__ == "__main__":
    
    # predict the labels of the test dataset
    predicted_labels = predict(classifier, Kmeans)

    # get the acutal labels
    actual_labels = get_actual_labels(TEST_DATASET_PATH)

    # Evaluate Classifier
    print ("actual_labels   ", actual_labels)
    print ("predicted_labels", predicted_labels)

    accuracy = accuracy_score(actual_labels, predicted_labels)
    print(f"Test Accuracy: {accuracy * 100:.2f}%")

    print(classification_report(actual_labels, predicted_labels))