<a href="https://colab.research.google.com/github/Lursen/BoVW-SIFT-SVM-for-classification/blob/main/SIFT_SVM_LowLight.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip uninstall opencv-contrib-python

In [None]:
!pip install opencv-contrib-python

In [None]:
import glob, os
from PIL import Image
import cv2
import numpy as np
import random
from sklearn.cluster import MiniBatchKMeans
from sklearn.model_selection import GridSearchCV
from sklearn.svm import LinearSVC
import matplotlib.pyplot as plt

In [None]:
# Loading data
def readImages(dir, categories):
  data = {}
  for category in categories:
      cur_dir = rootdir+category
      imgs = []
      for img in os.listdir(cur_dir):
          if img is not None:
            #image = Image.open(cur_dir+'/'+img)
            image = cv2.imread(cur_dir + "/" + img,0)
            imgs.append(image)
            print(img)
      data[category] = imgs
  return data

# Creating training/test sets
def splitImages(data, train_size):
  # train/test split
  train = {}
  test = {}
  for key in data:
    cat_train = []
    cat_test = []
    random.shuffle(data[key])
    i = 0
    for value in data[key]:
      if (i < int(len(data[key])*0.8)):
        cat_train.append(value)
      else:
        cat_test.append(value)
      i = i+1
    train[key] = cat_train
    test[key] = cat_test
  return train, test
  
# SIFT features
def getSiftFeatures(data):
  sift_vectors = {}
  descriptor_list = []
  sift = cv2.xfeatures2d.SIFT_create()
  for key,value in data.items():
    descriptors = []
    print(key)
    for img in value:
        keys, descs = sift.detectAndCompute(img, None)
        if(descs is not None):
          descriptors.append(descs)
          descriptor_list.extend(descs)
    sift_vectors[key] = descriptors
  return descriptor_list, sift_vectors

# K-means clusterization
def Kmeans(k, descriptor_list):
  kmeans = MiniBatchKMeans(n_clusters = k, init='k-means++', batch_size=1000, 
                           n_init=10, max_no_improvement=10, verbose=True).fit(descriptor_list)
  return kmeans

# Vector quantization
def getHistograms(descriptors, kmeans):
    dict_feature = {}
    for key,value in descriptors.items():
        category = []
        for img in value:
            histogram = np.zeros(len(kmeans.cluster_centers_))
            for each_feature in img:
              feature = (each_feature.reshape(1, 128))[0].astype('float')
              feature = feature.reshape(1, 128)
              idx = kmeans.predict(feature)
              histogram[idx] += 1
            category.append(histogram)
        dict_feature[key] = category
    return dict_feature

# Feature normalization
def featureNormalization(scale, im_features):
  scale = StandardScaler().fit(im_features)        
  im_features = scale.transform(im_features)
  return im_features


In [None]:
import os
import gdown
from zipfile import ZipFile

os.makedirs("dataset")

url = "https://drive.google.com/uc?id=1GZqHFzTLDI-1rcOctHdf-c16VgagWocd"
output = "dataset/ExDark.zip"
gdown.download(url, output, quiet=True)

with ZipFile("dataset/ExDark.zip", "r") as zipobj:
    zipobj.extractall("dataset")

In [None]:
categories = ['Bicycle','Boat','Bottle','Bus','Car','Cat','Chair','Cup','Dog','Motorbike','People','Table']
rootdir = "/content/dataset/"

# Load data
data = readImages(rootdir, categories)
train, test = splitImages(data, train_size=0.7)

In [None]:
# Get SIFT features
descriptor_list, train_features  = getSiftFeatures(train) 
test_features = getSiftFeatures(test)[1] 

# Perform K-means clusterization
k = 100 
kmeans = Kmeans(k, descriptor_list) 

In [None]:
# Creates histograms for train/test data    
hist_train = getHistograms((train_features), kmeans) 
hist_test = getHistograms((test_features), kmeans) 

In [None]:
X_train = []
y_train = []
X_test  = []
y_test  = []

dict_cat = {'Bicycle':0,'Boat':1,'Bottle':2,'Bus':3,'Car':4,'Cat':5,'Chair':6,'Cup':7,'Dog':8,'Motorbike':9,'People':10,'Table':11}

for train_key, train_val in hist_train.items():
  for val in train_val:
    X_train.append(val)
    y_train.append(dict_cat[train_key]) 

for test_key, test_val in hist_test.items():
  for val in test_val:
    X_test.append(val)
    y_test.append(dict_cat[test_key]) 

# Features normalization
from sklearn.preprocessing import StandardScaler
scale = StandardScaler()

X_train_norm = featureNormalization(scale, X_train)
X_test_norm = featureNormalization(scale, X_test)

Classification

In [None]:
from sklearn import datasets
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC
from sklearn.metrics import plot_confusion_matrix
from mlxtend.plotting import plot_decision_regions

def SVC_train(X_train, y_train, kernel):
  features = X_train
  Cs = [0.5, 0.1, 0.15, 0.2, 0.3]
  gammas = [0.1, 0.11, 0.095, 0.105]
  param_grid = {'C': Cs, 'gamma' : gammas}
  grid_search = GridSearchCV(SVC(kernel=kernel), param_grid, cv=5)
  grid_search.fit(X_train, y_train)
  params =  grid_search.best_params_

  C_param, gamma_param = params.get("C"), params.get("gamma")

  svm = SVC(kernel = kernel, C =  C_param, gamma = gamma_param, class_weight = None)
  svm.fit(X_train, y_train)
  print('Best Accuracy Through Grid Search : %.3f'%grid_search.best_score_)
  print('Best Parameters : ',grid_search.best_params_)
  print('Train Accuracy : %.3f'%svm.score(X_train, y_train))

  return svm

def SVC_test(svm, X_test, y_test):
  print('Test Accuracy : %.3f'%svm.score(X_test, y_test))

def plot_conf_matr(svm, X_test, y_test):
  # Generate confusion matrix
  matrix = plot_confusion_matrix(svm, X_test, y_test,
                                  cmap=plt.cm.Blues,
                                  normalize='true')
  plt.title('Confusion matrix for SVM classifier')
  plt.show(matrix)
  plt.show()


In [None]:
svm_rbf = SVC_train(X_train, y_train, 'rbf')
SVC_test(svm_rbf, X_test, y_test)
plot_conf_matr(svm_rbf, X_test, y_test)

In [None]:
svm_linear = SVC_train(X_train, y_train, 'linear')
SVC_test(svm_linear, X_test, y_test)
plot_conf_matr(svm_linear, X_test, y_test)


In [None]:
svm_polynomial  = SVC_train(X_train, y_train, 'poly')
SVC_test(svm_polynomial, X_test, y_test)
plot_conf_matr(svm_polynomial, X_test, y_test)