In [1]:
%pip install --upgrade pip
%pip install opencv-python scikit-learn matplotlib scipy==1.10.1

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [61]:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt

from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import KMeans
from sklearn.svm import LinearSVC
from scipy import stats
from pathlib import Path, PureWindowsPath
import random


def extract_dataset_info(data_path):
    # To do
    data_path = Path(data_path)
    # Get the absolute path of the data directory
    train_path = data_path / 'train'
    test_path = data_path / 'test'

    # sort the classes in the alphabetical order
    label_classes = sorted([d.name for d in train_path.iterdir() if d.is_dir()])
    img_train_list, label_train_list = [], []
    img_test_list, label_test_list = [], []

    # Iterate through each class directory and collect image paths and labels
    for label in label_classes:
        label_train_dir = train_path / label
        label_test_dir = test_path / label
        
        # append the training image paths and labels to the lists
        for img_path in label_train_dir.iterdir():
            img_train_list.append(str(img_path))
            label_train_list.append(label_classes.index(label))

        # append the testing image paths and labels to the lists
        for img_path in label_test_dir.iterdir():
            img_test_list.append(str(img_path))
            label_test_list.append(label_classes.index(label))

    return label_classes, label_train_list, img_train_list, label_test_list, img_test_list

def compute_dsift(img):
    # To do
    # use the SIFT descriptor to extract features
    sift = cv2.SIFT_create()
    # step_size means the distance between two keypoints
    step_size = 8
    # patch_size means the size of the keypoint
    patch_size = 16
    # create a dense grid of keypoints
    # the keypoints are evenly spaced in the image, with a size of patch_size
    kp = [cv2.KeyPoint(x + patch_size / 2, y + patch_size / 2, patch_size)
          for y in range(0, img.shape[0] - patch_size, step_size)
          for x in range(0, img.shape[1] - patch_size, step_size)]
    # compute(): calculate the SIFT descriptors for the keypoints
    # kp: the keypoints detected in the image
    # dense_feature: the SIFT descriptors for the keypoint, num(keypoint)=n
    kp, dense_feature = sift.compute(img, kp)
    
    # returns the (n,128) array of SIFT descriptors
    return dense_feature


def predict_knn(feature_train, label_train, feature_test, k):
    # To do
    # use the KNN classifier to predict the labels of the test features
    knn = NearestNeighbors(n_neighbors=k)
    # fit() function: fit the model to the training data
    knn.fit(feature_train)
    # kneighbors() function: find the k nearest neighbors of the test features
    # return the closest distance and the k indices of the neighbors
    _, indices = knn.kneighbors(feature_test)
    label_train = np.array(label_train)

    label_test_pred = []
    # for each test feature, find the most common label among its k nearest neighbors
    for idx in (indices):
        nearest_labels = label_train[idx]
        label=np.bincount(nearest_labels).argmax()
        label_test_pred.append(label)
    label_test_pred = np.array(label_test_pred)
    
    return label_test_pred


def build_visual_dictionary(dense_feature_list, dic_size):
    # To do
    # combine all the dense SIFT features into a single array
    all_features = np.vstack(dense_feature_list)
    # use the KMeans algorithm to cluster the SIFT feataures
    # set the number of getting clusters to dic_size for n_init times
    kmeans = KMeans(n_clusters=dic_size, random_state=0)
    kmeans.fit(all_features)
    # get the cluster centers from the KMeans model
    vocab = kmeans.cluster_centers_
    
    return vocab


def compute_bow(feature, vocab):
    # To do
    dic_size = vocab.shape[0]
    # use the NearestNeighbors algorithm to find the nearest visual word for each SIFT feature
    nn = NearestNeighbors(n_neighbors=1)
    nn.fit(vocab)
    # find the nearest visual word for each SIFT feature
    _, indices = nn.kneighbors(feature)

    # make the histogram of visual words
    # bow_feature: the histogram of visual words
    bow_feature = np.zeros(dic_size, dtype=np.float32)
    for idx in indices.flatten():
        bow_feature[idx] += 1.0

    # normalize the histogram
    # calculate the L2 norm of the histogram
    # divide the bow_feature by the L2 norm to normalize it
    norm = np.linalg.norm(bow_feature)
    if norm > 0:
        bow_feature /= norm
        
    return bow_feature


def classify_knn_bow(label_classes, label_train_list, img_train_list, label_test_list, img_test_list):
    # To do    
    print("Extracting dense SIFT features")
    # dense SIFT features from training images
    # print for debugging
    train_features = []
    for img_path in img_train_list:
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        feat = compute_dsift(img)
        train_features.append(feat)
        
    for dic_size in [550]:
        vocab = build_visual_dictionary(train_features, dic_size)

        print("Computing BoW histograms for training images")
        bow_train = []
        for feat in train_features:
            bow_train.append(compute_bow(feat, vocab))

        print("Computing BoW histograms for test images")
        test_features = []
        for img_path in img_test_list:
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            feat = compute_dsift(img)
            test_features.append(feat)

        bow_test = []
        for feat in test_features:
            bow_test.append(compute_bow(feat, vocab))

        print("Running KNN classifier")
        pred = predict_knn(bow_train, label_train_list, bow_test, k=15)

        # calculate confusion matrix and accuracy
        confusion = np.zeros((15, 15), dtype=int)
        for p, t in zip(pred, label_test_list):
            confusion[t][p] += 1
        accuracy = np.trace(confusion) / np.sum(confusion)
    
        # visualize_confusion_matrix(confusion, accuracy, label_classes, method_name=f"bow_knn_dic_size={dic_size}")
        visualize_confusion_matrix(confusion, accuracy, label_classes, method_name="bow_knn")
    
    return confusion, accuracy


def predict_svm(feature_train, label_train, feature_test):
    # To do
    label_test_pred = []

    for c in range(15):
        # create a binary label for each class
        binary_label_train = np.array([1 if lbl == c else 0 for lbl in label_train])
        # C=0.1
        # C=1
        # C=10
        C = 2
        # LinearSVC classifier with the given C value
        clf = LinearSVC(C=C, max_iter=10000)
        clf.fit(feature_train, binary_label_train)
        # predict the labels of the test features
        confidence = clf.decision_function(feature_test)
        label_test_pred.append(confidence)

    # get the strongest prediction from the 15 classes
    label_test_pred = np.argmax(np.array(label_test_pred), axis=0)
    return label_test_pred


def classify_svm_bow(label_classes, label_train_list, img_train_list, label_test_list, img_test_list):
    # To do
    print("Extracting dense SIFT features")
    train_features = []
    for img_path in img_train_list:
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        feat = compute_dsift(img)
        train_features.append(feat)

    for dic_size in [600]:
        vocab = build_visual_dictionary(train_features, dic_size)

        print("Computing BoW histograms for training images")
        bow_train = []
        for feat in train_features:
            bow_train.append(compute_bow(feat, vocab))

        print("Computing BoW histograms for test images")
        test_features = []
        for img_path in img_test_list:
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            feat = compute_dsift(img)
            test_features.append(feat)

        bow_test = []
        for feat in test_features:
            bow_test.append(compute_bow(feat, vocab))

        print("Running SVM classifier")
        pred = predict_svm(bow_train, label_train_list, bow_test)

        confusion = np.zeros((15, 15), dtype=int)
        for p, t in zip(pred, label_test_list):
            confusion[t][p] += 1
        accuracy = np.trace(confusion) / np.sum(confusion)
        
        # visualize_confusion_matrix(confusion, accuracy, label_classes, method_name=f"bow_svm_dic_size={dic_size}")
        visualize_confusion_matrix(confusion, accuracy, label_classes, method_name="bow_svm")
    
    return confusion, accuracy

def visualize_confusion_matrix(confusion, accuracy, label_classes,  method_name, out_dir="outputs"):
    os.makedirs(out_dir, exist_ok=True)
    plt.title("accuracy = {:.3f}".format(accuracy))
    plt.imshow(confusion)
    ax, fig = plt.gca(), plt.gcf()
    plt.xticks(np.arange(len(label_classes)), label_classes)
    plt.yticks(np.arange(len(label_classes)), label_classes)
    # set horizontal alignment mode (left, right or center) and rotation mode(anchor or default)
    plt.setp(ax.get_xticklabels(), rotation=-30, ha="center", rotation_mode="default")
    # avoid top and bottom part of heatmap been cut
    ax.set_xticks(np.arange(len(label_classes) + 1) - .5, minor=True)
    ax.set_yticks(np.arange(len(label_classes) + 1) - .5, minor=True)
    ax.tick_params(which="minor", bottom=False, left=False)
    fig.tight_layout()
    fname = os.path.join(out_dir, f"{method_name}_confusion_acc.png")
    plt.savefig(fname, bbox_inches='tight')
    plt.close()

if __name__ == '__main__':
    label_classes, label_train_list, img_train_list, label_test_list, img_test_list = extract_dataset_info("./scene_classification_data")

    classify_knn_bow(label_classes, label_train_list, img_train_list, label_test_list, img_test_list)
    
    classify_svm_bow(label_classes, label_train_list, img_train_list, label_test_list, img_test_list)
    


Extracting dense SIFT features
Computing BoW histograms for training images
Computing BoW histograms for test images
Running KNN classifier
Extracting dense SIFT features
Computing BoW histograms for training images
Computing BoW histograms for test images
Running SVM classifier
