In [1]:
import torch
print("Using torch", torch.__version__)

Using torch 2.0.1+cu117


In [2]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print("Device", device)

Device cuda


## Importing Libraries

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split    # For splitting the dataset
from sklearn import svm
from sklearn.datasets import make_moons
from sklearn.svm import SVC
from sklearn.inspection import DecisionBoundaryDisplay  # For plotting the non liner decision boundaries
from sklearn.model_selection import GridSearchCV        # For hyperperameter tuning
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

import joblib

## Data Pre-processing

In [4]:
training_data_95 = pd.read_csv('../csvs/hog_95/extracted_features_hog_95.csv',header = None)
print(training_data_95)

# X_train_df = training_data_95.drop(columns = [0])
# y_train_df = training_data_95[0]

# print(X_train_df)
# print(y_train_df)

                    0         1         2         3         4         5     \
0      Rachel_Leigh_Cook  0.101289  1.330157 -0.964354  0.278769 -0.543510   
1          Roseanne_Barr  0.391125  0.704784  1.217563 -0.780129 -1.414380   
2         Vladimir_Putin -1.246895 -0.680080  0.591671 -2.354134 -1.920589   
3           Carlos_Menem  0.597257 -0.661087  1.356273 -0.878202 -0.588220   
4          Lynne_Thigpen -1.589231 -0.049223 -1.306154 -0.461846  0.250779   
...                  ...       ...       ...       ...       ...       ...   
10581    Rohinton_Mistry  1.724274 -1.189989 -1.052075  1.067114 -0.041331   
10582      George_W_Bush  2.658667  0.404976  0.808148 -0.520925 -0.804796   
10583        Vicente_Fox  0.479659 -0.820659  0.506519  0.026553  1.815414   
10584          Todd_Wike -1.765033 -0.231035  0.285658 -1.902368 -0.086056   
10585         Kofi_Annan  1.083461  0.993570 -0.346959  1.194149 -1.330585   

           6         7         8         9     ...  3239   3240

In [5]:
testing_data_95 = pd.read_csv('../csvs/hog_95/extracted_features_test_hog_95.csv', header = None)
print(testing_data_95)

# X_test_df = testing_data_95.drop(columns = [0])
# y_test_df = testing_data_95[0]

# print(X_test_df)
# print(y_test_df)

                         0         1         2         3         4     \
0              Carolina_Kluft  1.943908 -0.685770  0.135847 -0.574862   
1            Roger_Etchegaray -1.104129  0.259980 -1.581375  0.471127   
2                  JK_Rowling  1.170456 -1.014306 -0.396331  0.125549   
3     Valery_Giscard_dEstaing  1.449810  0.777844  1.786893 -0.590618   
4         Juan_Carlos_Ferrero -0.364156 -1.960419 -0.170895  0.499791   
...                       ...       ...       ...       ...       ...   
2642            Michael_Chang  1.412572 -0.760504  0.483098 -0.281977   
2643              Osrat_Iosef  1.675409  2.553274 -0.504704 -0.422554   
2644              Yann_Martel  1.023609 -0.542472  0.853466  0.449747   
2645             Mitzi_Gaynor -0.901391  0.372586  2.181821  0.809191   
2646                Fujio_Cho  0.938650 -0.126036 -1.422143  0.285539   

          5         6         7         8         9     ...  3239   3240  \
0    -0.959423  0.472946  0.260016  0.167261 -0

### Taking only the images of persons whose number of images are greater than equal to 50

In [6]:
df = pd.concat([training_data_95,testing_data_95],axis = 0)
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 13233 entries, 0 to 2646
Columns: 3249 entries, 0 to 3248
dtypes: float64(3248), object(1)
memory usage: 328.1+ MB


In [7]:
df_50 = df[df[0].map(df[0].value_counts()) >= 50]  # Reference Stack overflow
df_50.reset_index(drop=True, inplace = True)
df_50

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,3239,3240,3241,3242,3243,3244,3245,3246,3247,3248
0,George_W_Bush,-1.824086,-0.401801,-0.521794,-0.587088,-0.509684,0.280968,-1.599484,0.290741,0.713462,...,31.0,359.0,1280.0,349.0,26.0,262.0,456.0,332.0,313.0,16523.0
1,Colin_Powell,0.549831,-0.715630,1.769869,0.222841,0.108078,0.219034,-0.818753,-0.839518,-0.129442,...,44.0,504.0,1817.0,748.0,30.0,501.0,556.0,396.0,362.0,14746.0
2,George_W_Bush,-0.628315,-0.750331,0.547397,0.478640,2.337426,0.526050,0.712348,-1.843158,-0.076518,...,27.0,302.0,1903.0,347.0,13.0,263.0,381.0,313.0,252.0,29224.0
3,George_W_Bush,2.176470,-0.997268,0.699885,-1.213921,0.090995,1.768202,0.787791,-0.212840,0.012334,...,58.0,725.0,2027.0,597.0,39.0,361.0,507.0,334.0,312.0,6742.0
4,Tony_Blair,-2.303506,-0.646863,0.022650,1.529321,0.472518,0.093752,0.199501,-0.097458,-0.722055,...,25.0,374.0,2097.0,485.0,12.0,340.0,622.0,450.0,491.0,16778.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1555,George_W_Bush,-0.754818,-0.751547,-0.307345,0.836765,-0.443559,0.034152,1.786415,0.199164,-0.641788,...,41.0,496.0,1763.0,513.0,22.0,445.0,518.0,527.0,430.0,11888.0
1556,Tony_Blair,0.356975,-0.575622,-1.112081,-0.147580,2.257156,1.439668,-0.556521,0.081855,1.255535,...,45.0,646.0,3227.0,691.0,25.0,573.0,724.0,379.0,567.0,12012.0
1557,George_W_Bush,2.378569,0.299064,-0.343518,1.097499,0.782947,0.991848,-0.635729,-0.408563,-0.150484,...,40.0,353.0,1540.0,480.0,35.0,355.0,396.0,395.0,318.0,27232.0
1558,Tony_Blair,0.732607,-0.158723,-2.044487,0.405323,1.605780,-0.532496,-0.508825,0.240685,0.907796,...,35.0,522.0,1654.0,719.0,30.0,543.0,728.0,603.0,519.0,8109.0


In [8]:
print("Number of persons are ",df_50[0].unique().shape[0])

Number of persons are  12


In [9]:
X_50 = df_50.drop(columns = [0])
y_50 = df_50[0]
X_train_50_df, X_test_50_df, y_train_50, y_test_50 = train_test_split(X_50,y_50,train_size=0.8,random_state=42)
X_train_50_df.reset_index(drop=True, inplace = True)
X_test_50_df.reset_index(drop=True, inplace = True)
y_train_50.reset_index(drop=True, inplace = True)
y_test_50.reset_index(drop=True, inplace = True)
print(X_50)

          1         2         3         4         5         6         7     \
0    -1.824086 -0.401801 -0.521794 -0.587088 -0.509684  0.280968 -1.599484   
1     0.549831 -0.715630  1.769869  0.222841  0.108078  0.219034 -0.818753   
2    -0.628315 -0.750331  0.547397  0.478640  2.337426  0.526050  0.712348   
3     2.176470 -0.997268  0.699885 -1.213921  0.090995  1.768202  0.787791   
4    -2.303506 -0.646863  0.022650  1.529321  0.472518  0.093752  0.199501   
...        ...       ...       ...       ...       ...       ...       ...   
1555 -0.754818 -0.751547 -0.307345  0.836765 -0.443559  0.034152  1.786415   
1556  0.356975 -0.575622 -1.112081 -0.147580  2.257156  1.439668 -0.556521   
1557  2.378569  0.299064 -0.343518  1.097499  0.782947  0.991848 -0.635729   
1558  0.732607 -0.158723 -2.044487  0.405323  1.605780 -0.532496 -0.508825   
1559 -0.431153 -1.341352 -0.610607 -0.411693  0.667436 -0.542481 -1.090542   

          8         9         10    ...  3239   3240    3241   

In [10]:
#Normalising the data
scaler=StandardScaler()
X_train_arr=X_train_50_df.values
X_test_arr=X_test_50_df.values
X_train_arr_n=scaler.fit_transform(X_train_arr)
joblib.dump(scaler,'Standard_Scalar_Normalisation.joblib')
X_test_arr_n = scaler.transform(X_test_arr)
X_train_50 = pd.DataFrame(X_train_arr_n)
X_test_50 = pd.DataFrame(X_test_arr_n)
# X_train_50

# All features concatenated

## LinearSVC

In [None]:
# Training Linear Support Vector Classifier (LinearSVC)
lin_clf = svm.LinearSVC(dual=False)
lin_clf.fit(X_train_50, y_train_50)

In [None]:
joblib.dump(lin_clf,'LinearSVC_All_n.joblib')

In [None]:
pred_y_test_50 = lin_clf.predict(X_test_50)

In [None]:
# Accuracy of linearSVC model
linSVC_accuracy = lin_clf.score(X_test_50, y_test_50)
print("Accuracy of linearSVC is ",linSVC_accuracy)

In [None]:
# Classification Report of linearSVC model
print(classification_report(y_test_50,pred_y_test_50))

In [None]:
cm_linearSVC_ALL = confusion_matrix(y_test_50, pred_y_test_50)
fig, axes = plt.subplots(figsize=(5, 5))
sns.heatmap(cm_linearSVC_ALL, annot=True, fmt="d", cmap="Blues", xticklabels=lin_clf.classes_, yticklabels=lin_clf.classes_, ax=axes)
axes.set_title('LinearSVC on concatenated features(from CNN, HOG, LBP)')
axes.set_xlabel('Predicted')
axes.set_ylabel('True')

## SVM Models with Linear, polynomial and rbf kernals

In [None]:
svm_linear = SVC(kernel='linear') # linear kernal
svm_poly = SVC(kernel='poly', degree= 3)  # Polynomial kernel of degree 3
svm_rbf = SVC(kernel='rbf')  # RBF kernel

In [None]:
svm_linear.fit(X_train_50, y_train_50)
svm_poly.fit(X_train_50, y_train_50)
svm_rbf.fit(X_train_50, y_train_50)

In [None]:
joblib.dump(svm_linear,'svm_linear_ALL_n.joblib')
joblib.dump(svm_poly,'svm_poly_ALL_n.joblib')
joblib.dump(svm_rbf,'svm_rbf_ALL_n.joblib')

### Linear Kernel

In [None]:
svm_linear.get_params()

In [None]:
lin_pred_y_50 = svm_linear.predict(X_test_50)

In [None]:
lin_accuracy = accuracy_score(y_test_50,lin_pred_y_50)
print("Accuracy of SVM with Linear kernel is ",lin_accuracy)

In [None]:
print(classification_report(y_test_50,lin_pred_y_50))

#### Tunning Hyperparameter

In [None]:
c_values = [0.1, 1, 10, 100]

accuracy_values = []
predicted_values = []

for i in c_values:
    svm_lin_tunned = SVC(kernel='linear', C = i )
    svm_lin_tunned.fit(X_train_50, y_train_50)
    lin_t_pred_y_50 = svm_lin_tunned.predict(X_test_50)
    lin_t_accuracy = accuracy_score(y_test_50,lin_t_pred_y_50)
    
    accuracy_values.append(lin_t_accuracy)
    predicted_values.append(lin_t_pred_y_50)
    
print(accuracy_values)

Therefor, value of c (between 0.1 to 100) is not affecting the accuracy.
So, no requirement of hyperparameter tuning.

### Polynomial Kernel

In [None]:
svm_poly.get_params()

In [None]:
poly_pred_y_50 = svm_poly.predict(X_test_50)

In [None]:
poly_accuracy = accuracy_score(y_test_50,poly_pred_y_50)
print("Accuracy of SVM with polynomial kernel (of degree 3) is ",poly_accuracy)

In [None]:
print(classification_report(y_test_50,poly_pred_y_50))

In [None]:
cm_svm_poly_ALL_n = confusion_matrix(y_test_50, pred_y_test_50)
fig, axes = plt.subplots(figsize=(5, 5))
sns.heatmap(cm_svm_poly_ALL_n, annot=True, fmt="d", cmap="Blues", xticklabels=svm_poly.classes_, yticklabels=svm_poly.classes_, ax=axes)
axes.set_title('SVM with polynomial(degree = 3) Kernel\n on concatenated features(from CNN, HOG, LBP)\n on normalised data')
axes.set_xlabel('Predicted')
axes.set_ylabel('True')

#### Tuning Hyperparameters

In [None]:
# Defining parameter grid
param_grid = {'C': [0.1, 1, 10, 100],
              'gamma': [0.001, 0.01, 0.1, 1]}

# Performing grid search
grid_search = GridSearchCV(estimator=svm_poly, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train_50,y_train_50)

cv_results = grid_search.cv_results_
print(cv_results)
# Best parameters
poly_best_params_initial = grid_search.best_params_
poly_best_params_initial

In [None]:
# cv_results['param_C'].data

In [None]:
C_values = [0.1,1,10,100]
Gamma_values = [0.001,0.01,0.1,1]
mean_C = []
for i in range(len(C_values)):
    sum = 0
    for j in range(len(Gamma_values)):
        sum = sum + cv_results['mean_test_score'][i+j]
    mean_C.append(sum/len(Gamma_values))
print(mean_C)

mean_gamma = []
for i in range(len(Gamma_values)):
    sum = 0
    for j in range(len(C_values)):
        if (i+j)%len(Gamma_values) == 0:
            sum = sum + cv_results['mean_test_score'][i+j]
    mean_gamma.append(sum/len(C_values))
print(mean_gamma)

In [None]:
fig, ax = plt.subplots()


ax.plot(C_values, mean_C, marker='o', linestyle='-')
ax.set_xticks(C_values)
ax.set_xticklabels(C_values,ha='right')

ax.set_xlabel('C')
ax.set_ylabel('Mean of Accuracies')
ax.set_title('Mean of Accuracies v/s C')

# Show the plot
plt.show()

In [None]:
fig, ax = plt.subplots()


ax.plot(Gamma_values, mean_gamma, marker='o', linestyle='-')
ax.set_xticks(Gamma_values)
ax.set_xticklabels(Gamma_values, rotation=45 ,ha='right')

ax.set_xlabel('gamma')
ax.set_ylabel('Mean of Accuracies')
ax.set_title('Mean of Accuracies v/s gamma')

# Show the plot
plt.show()

In [None]:
svm_poly_tunned = SVC(kernel='poly', gamma=poly_best_params_initial['gamma'], C =poly_best_params_initial['C'] )
svm_poly_tunned.fit(X_train_50, y_train_50)

In [None]:
joblib.dump(svm_poly_tunned,'svm_poly_ALL_n_t.joblib')

In [None]:
poly_t_pred_y_50 = svm_poly_tunned.predict(X_test_50)

In [None]:
poly_t_accuracy = accuracy_score(y_test_50,poly_t_pred_y_50)
print("Accuracy of SVM with Polynomial kernel (degree = 3) with tunned hyperparameters is ",poly_t_accuracy)

In [None]:
print(classification_report(y_test_50,poly_t_pred_y_50))

In [None]:
cm_svm_poly_ALL_n_t = confusion_matrix(y_test_50, poly_t_pred_y_50)
fig, axes = plt.subplots(figsize=(5, 5))
sns.heatmap(cm_svm_poly_ALL_n_t, annot=True, fmt="d", cmap="Blues", xticklabels=svm_poly_tunned.classes_, yticklabels=svm_poly_tunned.classes_, ax=axes)
axes.set_title('SVM with polynomial(degree = 3) Kernel\n on concatenated features(from CNN, HOG, LBP)\n on normalised data with tunned Hyperparameters')
axes.set_xlabel('Predicted')
axes.set_ylabel('True')

### RBF Kernel

In [None]:
# svm_rbf = joblib.load('svm_rbf_ALL_n.joblib')

In [None]:
rbf_pred_y_50 = svm_rbf.predict(X_test_50)

In [None]:
rbf_accuracy = accuracy_score(y_test_50,rbf_pred_y_50)
print("Accuracy of SVM with RBF kernel is ",rbf_accuracy)

In [None]:
print(classification_report(y_test_50,rbf_pred_y_50))

In [None]:
cm_svm_rbf_ALL_n = confusion_matrix(y_test_50, rbf_pred_y_50)
fig, axes = plt.subplots(figsize=(5, 5))
sns.heatmap(cm_svm_rbf_ALL_n, annot=True, fmt="d", cmap="Blues", xticklabels=svm_rbf.classes_, yticklabels=svm_rbf.classes_, ax=axes)
axes.set_title('SVM with RBF Kernel\n on concatenated features(from CNN, HOG, LBP)\n on normalised data')
axes.set_xlabel('Predicted')
axes.set_ylabel('True')

#### Tuning Hyperparameteres

In [None]:
# Define parameter grid
param_grid = {'C': [0.1, 1, 10, 100],
              'gamma': [0.001, 0.01, 0.1, 1]}

# Performing grid search
grid_search_i = GridSearchCV(estimator=svm_rbf, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search_i.fit(X_train_50, y_train_50)

# Best parameters
rbf_best_params_initial = grid_search_i.best_params_
rbf_best_params_initial

In [None]:
C_values = [0.1,1,10,100]
Gamma_values = [0.001,0.01,0.1,1]
mean_C = []
for i in range(len(C_values)):
    sum = 0
    for j in range(len(Gamma_values)):
        sum = sum + grid_search_i.cv_results_['mean_test_score'][i+j]
    mean_C.append(sum/len(Gamma_values))
print(mean_C)

mean_gamma = []
for i in range(len(Gamma_values)):
    sum = 0
    for j in range(len(C_values)):
        if (i+j)%len(Gamma_values) == 0:
            sum = sum + grid_search_i.cv_results_['mean_test_score'][i+j]
    mean_gamma.append(sum/len(C_values))
print(mean_gamma)


In [None]:
fig, ax = plt.subplots()
ax.plot(C_values, mean_C, marker='o', linestyle='-')
ax.set_xticks(C_values)
ax.set_xticklabels(C_values,ha='right')

ax.set_xlabel('C')
ax.set_ylabel('Mean of Accuracies')
ax.set_title('Mean of Accuracies v/s C')

plt.show()


In [None]:
fig, ax = plt.subplots()
ax.plot(Gamma_values, mean_gamma, marker='o', linestyle='-')
ax.set_xticks(Gamma_values)
ax.set_xticklabels(Gamma_values, rotation=45 ,ha='right')

ax.set_xlabel('gamma')
ax.set_ylabel('Mean of Accuracies')
ax.set_title('Mean of Accuracies v/s gamma')

plt.show()

In [None]:
svm_rbf_tunned = SVC(kernel='rbf', gamma=rbf_best_params_initial['gamma'], C =rbf_best_params_initial['C'] )
svm_rbf_tunned.fit(X_train_50, y_train_50)

In [None]:
joblib.dump(svm_rbf_tunned,'svm_rbf_ALL_n_ti.joblib')

In [None]:
rbf_t_pred_y_50 = svm_rbf_tunned.predict(X_test_50)

In [None]:
rbf_t_accuracy = accuracy_score(y_test_50,rbf_t_pred_y_50)
print("Accuracy of SVM with rbf kernel with tunned hyperparameters is ",rbf_t_accuracy)

In [None]:
print(classification_report(y_test_50,rbf_t_pred_y_50))

In [None]:
cm_svm_rbf_ALL_n = confusion_matrix(y_test_50, rbf_t_pred_y_50)
fig, axes = plt.subplots(figsize=(5, 5))
sns.heatmap(cm_svm_rbf_ALL_n, annot=True, fmt="d", cmap="Blues", xticklabels=svm_rbf_tunned.classes_, yticklabels=svm_rbf_tunned.classes_, ax=axes)
axes.set_title('SVM with RBF Kernel\n on concatenated features(from CNN, HOG, LBP)\n on normalised data')
axes.set_xlabel('Predicted')
axes.set_ylabel('True')

In [None]:
# Defining the starting and ending points for C and gamma, and the number of points
start_C = 1
end_C = 1
num_C = 1

start_gamma = 0.001
end_gamma = 0.1
num_gamma = 15

# Parameter grid with specified ranges and number of points
param_grid = {'C': np.linspace(start_C, end_C, num_C),
              'gamma': np.linspace(start_gamma, end_gamma, num_gamma)}


# Performing grid search
grid_search_f = GridSearchCV(estimator=svm_rbf, param_grid=param_grid, cv=5 , scoring='accuracy', n_jobs=-1, verbose=2)
grid_search_f.fit(X_train_50, y_train_50)

# Best parameters
rbf_best_params_final = grid_search_f.best_params_
rbf_best_accuracy_final = grid_search_f.best_score_
rbf_best_params_final

In [None]:
svm_rbf_tunned_f = SVC(kernel='rbf', gamma=rbf_best_params_final['gamma'], C =rbf_best_params_final['C'] )
svm_rbf_tunned_f.fit(X_train_50, y_train_50)

In [None]:
joblib.dump(svm_rbf_tunned_f,'svm_rbf_ALL_n_tf.joblib')

In [None]:
rbf_t_pred_y_50_f = svm_rbf_tunned_f.predict(X_test_50)

In [None]:
rbf_t_accuracy_f = accuracy_score(y_test_50,rbf_t_pred_y_50_f)
print("Accuracy of SVM with rbf kernel with tunned hyperparameters is ",rbf_t_accuracy_f)

In [None]:
print(classification_report(y_test_50,rbf_t_pred_y_50_f))

In [None]:
cm_svm_rbf_ALL_n_t = confusion_matrix(y_test_50, rbf_t_pred_y_50_f)
fig, axes = plt.subplots(figsize=(5, 5))
sns.heatmap(cm_svm_rbf_ALL_n_t, annot=True, fmt="d", cmap="Blues", xticklabels=svm_rbf_tunned_f.classes_, yticklabels=svm_rbf_tunned_f.classes_, ax=axes)
axes.set_title('SVM with RBF Kernel\n on concatenated features(from CNN, HOG, LBP)\n on normalised data with tunned Hyperparameters')
axes.set_xlabel('Predicted')
axes.set_ylabel('True')

# CNN HOG LBP Features seperately

In [None]:
X_hog = X_50.loc[:,:944]
X_cnn = X_50.loc[:,945:2992]
X_lbp = X_50.loc[:,2993:]

In [None]:
# X_hog
# X_cnn
# X_lbp

## CNN Features

In [None]:
X_train_50_df_cnn, X_test_50_df_cnn, y_train_50_cnn, y_test_50_cnn = train_test_split(X_cnn,y_50,train_size=0.8,random_state=42)
X_train_50_df_cnn.reset_index(drop=True, inplace = True)
X_test_50_df_cnn.reset_index(drop=True, inplace = True)
y_train_50_cnn.reset_index(drop=True, inplace = True)
y_test_50_cnn.reset_index(drop=True, inplace = True)

In [None]:
#Normalising the data
scaler_cnn=StandardScaler()
X_train_arr=X_train_50_df_cnn.values
X_test_arr=X_test_50_df_cnn.values
X_train_arr_n=scaler_cnn.fit_transform(X_train_arr)
joblib.dump(scaler_cnn,'Standard_Scalar_Normalisation_cnn.joblib')
X_test_arr_n = scaler_cnn.transform(X_test_arr)
X_train_50_cnn = pd.DataFrame(X_train_arr_n)
X_test_50_cnn = pd.DataFrame(X_test_arr_n)
# X_train_50_cnn

### LinearSVC

In [None]:
# Training Linear Support Vector Classifier (LinearSVC)
lin_clf_cnn = svm.LinearSVC(dual=False)
lin_clf_cnn.fit(X_train_50_cnn, y_train_50_cnn)
joblib.dump(lin_clf_cnn,'linearSVC_cnn.joblib')

In [None]:
pred_y_test_50_cnn = lin_clf_cnn.predict(X_test_50_cnn)

In [None]:
# Accuracy of linearSVC model
linSVC_accuracy = lin_clf_cnn.score(X_test_50_cnn, y_test_50_cnn)
print("Accuracy of linearSVC is ",linSVC_accuracy)

In [None]:
# Classification Report of linearSVC model
print(classification_report(y_test_50_cnn,pred_y_test_50_cnn))

In [None]:
cm_linearSVC_cnn_n = confusion_matrix(y_test_50_cnn, pred_y_test_50_cnn)
fig, axes = plt.subplots(figsize=(5, 5))
sns.heatmap(cm_linearSVC_cnn_n, annot=True, fmt="d", cmap="Blues", xticklabels=lin_clf_cnn.classes_, yticklabels=lin_clf_cnn.classes_, ax=axes)
axes.set_title('LinearSVC on CNN features \n on normalised data')
axes.set_xlabel('Predicted')
axes.set_ylabel('True')

### SVM Models with Linear, polynomial and rbf kernals

In [None]:
svm_linear_cnn = SVC(kernel='linear') # linear kernal
svm_poly_cnn = SVC(kernel='poly', degree= 3)  # Polynomial kernel of degree 3
svm_rbf_cnn = SVC(kernel='rbf')  # RBF kernel

In [None]:
svm_linear_cnn.fit(X_train_50_cnn, y_train_50_cnn)
svm_poly_cnn.fit(X_train_50_cnn, y_train_50_cnn)
svm_rbf_cnn.fit(X_train_50_cnn, y_train_50_cnn)

In [None]:
joblib.dump(svm_linear_cnn,'svm_linear_cnn_n.joblib')
joblib.dump(svm_poly_cnn,'svm_poly_cnn_n.joblib')
joblib.dump(svm_rbf_cnn,'svm_rbf_cnn_n.joblib')

#### Linear Kernel

In [None]:
svm_linear_cnn.get_params()

In [None]:
lin_pred_y_50_cnn = svm_linear_cnn.predict(X_test_50_cnn)

In [None]:
lin_accuracy_cnn = accuracy_score(y_test_50_cnn,lin_pred_y_50_cnn)
print("Accuracy of SVM with Linear kernel is ",lin_accuracy_cnn)

In [None]:
print(classification_report(y_test_50_cnn,lin_pred_y_50_cnn))

In [None]:
cm_svm_linear_cnn_n = confusion_matrix(y_test_50, lin_pred_y_50_cnn)
fig, axes = plt.subplots(figsize=(5, 5))
sns.heatmap(cm_svm_linear_cnn_n, annot=True, fmt="d", cmap="Blues", xticklabels=svm_linear_cnn.classes_, yticklabels=svm_linear_cnn.classes_, ax=axes)
axes.set_title('SVM with Linear Kernel\n on CNN features\n on normalised data')
axes.set_xlabel('Predicted')
axes.set_ylabel('True')

##### Tunning Hyperparameter

In [None]:
c_values = [0.1, 1, 10, 100]

accuracy_values_cnn = []
predicted_values_cnn = []

for i in c_values:
    svm_lin_tunned_cnn = SVC(kernel='linear', C = i )
    svm_lin_tunned_cnn.fit(X_train_50_cnn, y_train_50_cnn)
    lin_t_pred_y_50_cnn = svm_lin_tunned_cnn.predict(X_test_50_cnn)
    lin_t_accuracy_cnn = accuracy_score(y_test_50_cnn,lin_t_pred_y_50_cnn)
    
    accuracy_values_cnn.append(lin_t_accuracy_cnn)
    predicted_values_cnn.append(lin_t_pred_y_50_cnn)
    
print(accuracy_values_cnn)

Therefor, value of c (between 0.1 to 100) is not affecting the accuracy.
So, no requirement of hyperparameter tuning.

#### Polynomial Kernel

In [None]:
svm_poly_cnn.get_params()

In [None]:
poly_pred_y_50_cnn = svm_poly_cnn.predict(X_test_50_cnn)

In [None]:
poly_accuracy_cnn = accuracy_score(y_test_50_cnn,poly_pred_y_50_cnn)
print("Accuracy of SVM with polynomial kernel (of degree 3) is ",poly_accuracy_cnn)

In [None]:
print(classification_report(y_test_50_cnn,poly_pred_y_50_cnn))

In [None]:
cm_svm_poly_cnn_n = confusion_matrix(y_test_50, poly_pred_y_50_cnn)
fig, axes = plt.subplots(figsize=(5, 5))
sns.heatmap(cm_svm_poly_cnn_n, annot=True, fmt="d", cmap="Blues", xticklabels=svm_poly_cnn.classes_, yticklabels=svm_poly_cnn.classes_, ax=axes)
axes.set_title('SVM with Polynomial (degree=3) Kernel\n on CNN features\n on normalised data')
axes.set_xlabel('Predicted')
axes.set_ylabel('True')

##### Tuning Hyperparameters

In [None]:
# Defining parameter grid
param_grid = {'C': [0.1, 1, 10, 100],
              'gamma': [0.001, 0.01, 0.1, 1]}

# Performing grid search
grid_search_cnn_poly = GridSearchCV(estimator=svm_poly_cnn, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search_cnn_poly.fit(X_train_50_cnn,y_train_50_cnn)

# Best parameters
poly_best_params_initial_cnn = grid_search_cnn_poly.best_params_
poly_best_params_initial_cnn

In [None]:
C_values = [0.1,1,10,100]
Gamma_values = [0.001,0.01,0.1,1]
mean_C = []
for i in range(len(C_values)):
    sum = 0
    for j in range(len(Gamma_values)):
        sum = sum + grid_search_cnn_poly.cv_results_['mean_test_score'][i+j]
    mean_C.append(sum/len(Gamma_values))
print(mean_C)

mean_gamma = []
for i in range(len(Gamma_values)):
    sum = 0
    for j in range(len(C_values)):
        if (i+j)%len(Gamma_values) == 0:
            sum = sum + grid_search_cnn_poly.cv_results_['mean_test_score'][i+j]
    mean_gamma.append(sum/len(C_values))
print(mean_gamma)

In [None]:
fig, ax = plt.subplots()
ax.plot(C_values, mean_C, marker='o', linestyle='-')
ax.set_xticks(C_values)
ax.set_xticklabels(C_values,ha='right')

ax.set_xlabel('C')
ax.set_ylabel('Mean of Accuracies')
ax.set_title('Mean of Accuracies v/s C')

plt.show()


In [None]:
fig, ax = plt.subplots()
ax.plot(Gamma_values, mean_gamma, marker='o', linestyle='-')
ax.set_xticks(Gamma_values)
ax.set_xticklabels(Gamma_values, rotation=45 ,ha='right')

ax.set_xlabel('gamma')
ax.set_ylabel('Mean of Accuracies')
ax.set_title('Mean of Accuracies v/s gamma')

plt.show()

In [None]:
svm_poly_tunned_cnn = SVC(kernel='poly', gamma=poly_best_params_initial_cnn['gamma'], C =poly_best_params_initial_cnn['C'] )
svm_poly_tunned_cnn.fit(X_train_50_cnn, y_train_50_cnn)

In [None]:
joblib.dump(svm_poly_tunned_cnn,'svm_poly_cnn_n_t.joblib')

In [None]:
poly_t_pred_y_50_cnn = svm_poly_tunned_cnn.predict(X_test_50_cnn)

In [None]:
poly_t_accuracy_cnn = accuracy_score(y_test_50_cnn,poly_t_pred_y_50_cnn)
print("Accuracy of SVM with Polynomial kernel (degree = 3) with tunned hyperparameters is ",poly_t_accuracy_cnn)

In [None]:
print(classification_report(y_test_50_cnn,poly_t_pred_y_50_cnn))

In [None]:
cm_svm_poly_cnn_n_t = confusion_matrix(y_test_50, lin_pred_y_50_cnn)
fig, axes = plt.subplots(figsize=(5, 5))
sns.heatmap(cm_svm_poly_cnn_n_t, annot=True, fmt="d", cmap="Blues", xticklabels=svm_poly_tunned_cnn.classes_, yticklabels=svm_poly_tunned_cnn.classes_, ax=axes)
axes.set_title('SVM with Polynomial (degree=3) Kernel\n on CNN features\n on normalised data with tunned hyperparameters')
axes.set_xlabel('Predicted')
axes.set_ylabel('True')


#### RBF Kernel

In [None]:
rbf_pred_y_50_cnn = svm_rbf_cnn.predict(X_test_50_cnn)

In [None]:
rbf_accuracy_cnn = accuracy_score(y_test_50_cnn,rbf_pred_y_50_cnn)
print("Accuracy of SVM with RBF kernel is ",rbf_accuracy_cnn)

In [None]:
print(classification_report(y_test_50_cnn,rbf_pred_y_50_cnn))

In [None]:
cm_svm_rbf_cnn_n = confusion_matrix(y_test_50, rbf_pred_y_50_cnn)
fig, axes = plt.subplots(figsize=(5, 5))
sns.heatmap(cm_svm_rbf_cnn_n, annot=True, fmt="d", cmap="Blues", xticklabels=svm_rbf_cnn.classes_, yticklabels=svm_rbf_cnn.classes_, ax=axes)
axes.set_title('SVM with RBF Kernel\n on CNN features\n on normalised data')
axes.set_xlabel('Predicted')
axes.set_ylabel('True')

##### Tuning Hyperparameteres

In [None]:
# Define parameter grid
param_grid = {'C': [0.1, 1, 10, 100],
              'gamma': [0.001, 0.01, 0.1, 1]}

# Performing grid search
grid_search_cnn_rbf = GridSearchCV(estimator=svm_rbf, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search_cnn_rbf.fit(X_train_50_cnn, y_train_50_cnn)

# Best parameters
rbf_best_params_initial_cnn = grid_search_cnn_rbf.best_params_
rbf_best_params_initial_cnn

In [None]:
C_values = [0.1,1,10,100]
Gamma_values = [0.001,0.01,0.1,1]
mean_C = []
for i in range(len(C_values)):
    sum = 0
    for j in range(len(Gamma_values)):
        sum = sum + grid_search_cnn_rbf.cv_results_['mean_test_score'][i+j]
    mean_C.append(sum/len(Gamma_values))
print(mean_C)

mean_gamma = []
for i in range(len(Gamma_values)):
    sum = 0
    for j in range(len(C_values)):
        if (i+j)%len(Gamma_values) == 0:
            sum = sum + grid_search_cnn_rbf.cv_results_['mean_test_score'][i+j]
    mean_gamma.append(sum/len(C_values))
print(mean_gamma)

In [None]:
fig, ax = plt.subplots()
ax.plot(C_values, mean_C, marker='o', linestyle='-')
ax.set_xticks(C_values)
ax.set_xticklabels(C_values,ha='right')

ax.set_xlabel('C')
ax.set_ylabel('Mean of Accuracies')
ax.set_title('Mean of Accuracies v/s C')

plt.show()

In [None]:
fig, ax = plt.subplots()
ax.plot(Gamma_values, mean_gamma, marker='o', linestyle='-')
ax.set_xticks(Gamma_values)
ax.set_xticklabels(Gamma_values, rotation=45 ,ha='right')

ax.set_xlabel('gamma')
ax.set_ylabel('Mean of Accuracies')
ax.set_title('Mean of Accuracies v/s gamma')

plt.show()

In [None]:
svm_rbf_tunned_cnn = SVC(kernel='rbf', gamma=rbf_best_params_initial_cnn['gamma'], C =rbf_best_params_initial_cnn['C'] )
svm_rbf_tunned_cnn.fit(X_train_50_cnn, y_train_50_cnn)

In [None]:
joblib.dump(svm_rbf_tunned_cnn,'svm_rbf_cnn_n_ti.joblib')

In [None]:
rbf_t_pred_y_50_cnn = svm_rbf_tunned_cnn.predict(X_test_50_cnn)

In [None]:
rbf_t_accuracy_cnn = accuracy_score(y_test_50_cnn,rbf_t_pred_y_50_cnn)
print("Accuracy of SVM with rbf kernel with tunned hyperparameters is ",rbf_t_accuracy_cnn)

In [None]:
print(classification_report(y_test_50_cnn,rbf_t_pred_y_50_cnn))

In [None]:
# Defining the starting and ending points for C and gamma, and the number of points
start_C = 0.1
end_C = 10
num_C = 10

start_gamma = 0.001
end_gamma = 0.1
num_gamma = 5

# Parameter grid with specified ranges and number of points
param_grid = {'C': np.linspace(start_C, end_C, num_C),
              'gamma': np.linspace(start_gamma, end_gamma, num_gamma)}


# Performing grid search
grid_search_cnn_rbf = GridSearchCV(estimator=svm_rbf, param_grid=param_grid, cv=5 , scoring='accuracy', n_jobs=-1)
grid_search_cnn_rbf.fit(X_train_50_cnn, y_train_50_cnn)

# Best parameters
rbf_best_params_final_cnn = grid_search_cnn_rbf.best_params_
rbf_best_accuracy_final_cnn = grid_search_cnn_rbf.best_score_
rbf_best_params_final_cnn

In [None]:
svm_rbf_tunned_f_cnn = SVC(kernel='rbf', gamma=rbf_best_params_final_cnn['gamma'], C =rbf_best_params_final_cnn['C'] )
svm_rbf_tunned_f_cnn.fit(X_train_50_cnn, y_train_50_cnn)

In [None]:
joblib.dump(svm_rbf_tunned_f_cnn,'svm_rbf_cnn_n_tf.joblib')

In [None]:
rbf_t_pred_y_50_f_cnn = svm_rbf_tunned_f_cnn.predict(X_test_50_cnn)

In [None]:
rbf_t_accuracy_f_cnn = accuracy_score(y_test_50_cnn,rbf_t_pred_y_50_f_cnn)
print("Accuracy of SVM with rbf kernel with tunned hyperparameters is ",rbf_t_accuracy_f_cnn)

In [None]:
print(classification_report(y_test_50_cnn,rbf_t_pred_y_50_f_cnn))

In [None]:
cm_svm_rbf_cnn_n_tf = confusion_matrix(y_test_50, rbf_t_pred_y_50_f_cnn)
fig, axes = plt.subplots(figsize=(5, 5))
sns.heatmap(cm_svm_rbf_cnn_n_tf, annot=True, fmt="d", cmap="Blues", xticklabels=svm_rbf_tunned_f_cnn.classes_, yticklabels=svm_rbf_tunned_f_cnn.classes_, ax=axes)
axes.set_title('SVM with RBF Kernel\n on CNN features\n on normalised data with tunned hyperparameters')
axes.set_xlabel('Predicted')
axes.set_ylabel('True')

## HoG Features

In [None]:
X_train_50_df_hog, X_test_50_df_hog, y_train_50_hog, y_test_50_hog = train_test_split(X_hog,y_50,train_size=0.8,random_state=42)
X_train_50_df_hog.reset_index(drop=True, inplace = True)
X_test_50_df_hog.reset_index(drop=True, inplace = True)
y_train_50_hog.reset_index(drop=True, inplace = True)
y_test_50_hog.reset_index(drop=True, inplace = True)

In [None]:
#Normalising the data
scaler_hog=StandardScaler()
X_train_arr=X_train_50_df_hog.values
X_test_arr=X_test_50_df_hog.values
X_train_arr_n=scaler_hog.fit_transform(X_train_arr)
joblib.dump(scaler_hog,'Standard_Scalar_Normalisation_hog.joblib')
X_test_arr_n = scaler_hog.transform(X_test_arr)
X_train_50_hog = pd.DataFrame(X_train_arr_n)
X_test_50_hog = pd.DataFrame(X_test_arr_n)
# X_train_50_hog

### LinearSVC

In [None]:
# Training Linear Support Vector Classifier (LinearSVC)
lin_clf_hog = svm.LinearSVC(dual=False)
lin_clf_hog.fit(X_train_50_hog, y_train_50_hog)
joblib.dump(lin_clf_hog,'linearSVC_hog.joblib')

In [None]:
pred_y_test_50_hog = lin_clf_hog.predict(X_test_50_hog)

In [None]:
# Accuracy of linearSVC model
linSVC_accuracy = lin_clf_hog.score(X_test_50_hog, y_test_50_hog)
print("Accuracy of linearSVC is ",linSVC_accuracy)

In [None]:
# Classification Report of linearSVC model
print(classification_report(y_test_50_hog,pred_y_test_50_hog))

In [None]:
cm_linearSVC_hog_n = confusion_matrix(y_test_50_hog, pred_y_test_50_hog)
fig, axes = plt.subplots(figsize=(5, 5))
sns.heatmap(cm_linearSVC_hog_n, annot=True, fmt="d", cmap="Blues", xticklabels=lin_clf_hog.classes_, yticklabels=lin_clf_hog.classes_, ax=axes)
axes.set_title('LinearSVC on hog features \n on normalised data')
axes.set_xlabel('Predicted')
axes.set_ylabel('True')

### SVM Models with Linear, polynomial and rbf kernals

In [None]:
svm_linear_hog = SVC(kernel='linear') # linear kernal
svm_poly_hog = SVC(kernel='poly', degree= 3)  # Polynomial kernel of degree 3
svm_rbf_hog = SVC(kernel='rbf')  # RBF kernel

In [None]:
svm_linear_hog.fit(X_train_50_hog, y_train_50_hog)
svm_poly_hog.fit(X_train_50_hog, y_train_50_hog)
svm_rbf_hog.fit(X_train_50_hog, y_train_50_hog)

In [None]:
joblib.dump(svm_linear_hog,'svm_linear_hog_n.joblib')
joblib.dump(svm_poly_hog,'svm_poly_hog_n.joblib')
joblib.dump(svm_rbf_hog,'svm_rbf_hog_n.joblib')

#### Linear Kernel

In [None]:
svm_linear_hog.get_params()

In [None]:
lin_pred_y_50_hog = svm_linear_hog.predict(X_test_50_hog)

In [None]:
lin_accuracy_hog = accuracy_score(y_test_50_hog,lin_pred_y_50_hog)
print("Accuracy of SVM with Linear kernel is ",lin_accuracy_hog)

In [None]:
print(classification_report(y_test_50_hog,lin_pred_y_50_hog))

In [None]:
cm_svm_linear_hog_n = confusion_matrix(y_test_50, lin_pred_y_50_hog)
fig, axes = plt.subplots(figsize=(5, 5))
sns.heatmap(cm_svm_linear_hog_n, annot=True, fmt="d", cmap="Blues", xticklabels=svm_linear_hog.classes_, yticklabels=svm_linear_hog.classes_, ax=axes)
axes.set_title('SVM with Linear Kernel\n on hog features\n on normalised data')
axes.set_xlabel('Predicted')
axes.set_ylabel('True')

##### Tunning Hyperparameter

In [None]:
c_values = [0.1, 1, 10, 100]

accuracy_values_hog = []
predicted_values_hog = []

for i in c_values:
    svm_lin_tunned_hog = SVC(kernel='linear', C = i )
    svm_lin_tunned_hog.fit(X_train_50_hog, y_train_50_hog)
    lin_t_pred_y_50_hog = svm_lin_tunned_hog.predict(X_test_50_hog)
    lin_t_accuracy_hog = accuracy_score(y_test_50_hog,lin_t_pred_y_50_hog)
    
    accuracy_values_hog.append(lin_t_accuracy_hog)
    predicted_values_hog.append(lin_t_pred_y_50_hog)
    
print(accuracy_values_hog)

Therefor, value of c (between 0.1 to 100) is not affecting the accuracy.
So, no requirement of hyperparameter tuning.

#### Polynomial Kernel

In [None]:
svm_poly_hog.get_params()

In [None]:
poly_pred_y_50_hog = svm_poly_hog.predict(X_test_50_hog)

In [None]:
poly_accuracy_hog = accuracy_score(y_test_50_hog,poly_pred_y_50_hog)
print("Accuracy of SVM with polynomial kernel (of degree 3) is ",poly_accuracy_hog)

In [None]:
print(classification_report(y_test_50_hog,poly_pred_y_50_hog))

In [None]:
cm_svm_poly_hog_n = confusion_matrix(y_test_50, poly_pred_y_50_hog)
fig, axes = plt.subplots(figsize=(5, 5))
sns.heatmap(cm_svm_poly_hog_n, annot=True, fmt="d", cmap="Blues", xticklabels=svm_poly_hog.classes_, yticklabels=svm_poly_hog.classes_, ax=axes)
axes.set_title('SVM with Polynomial (degree=3) Kernel\n on hog features\n on normalised data')
axes.set_xlabel('Predicted')
axes.set_ylabel('True')

##### Tuning Hyperparameters

In [None]:
# Defining parameter grid
param_grid = {'C': [0.1, 1, 10, 100],
              'gamma': [0.001, 0.01, 0.1, 1]}

# Performing grid search
grid_search_hog_poly = GridSearchCV(estimator=svm_poly_hog, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search_hog_poly.fit(X_train_50_hog,y_train_50_hog)

# Best parameters
poly_best_params_initial_hog = grid_search_hog_poly.best_params_
poly_best_params_initial_hog

In [None]:
C_values = [0.1,1,10,100]
Gamma_values = [0.001,0.01,0.1,1]
mean_C = []
for i in range(len(C_values)):
    sum = 0
    for j in range(len(Gamma_values)):
        sum = sum + grid_search_hog_poly.cv_results_['mean_test_score'][i+j]
    mean_C.append(sum/len(Gamma_values))
print(mean_C)

mean_gamma = []
for i in range(len(Gamma_values)):
    sum = 0
    for j in range(len(C_values)):
        if (i+j)%len(Gamma_values) == 0:
            sum = sum + grid_search_hog_poly.cv_results_['mean_test_score'][i+j]
    mean_gamma.append(sum/len(C_values))
print(mean_gamma)

In [None]:
fig, ax = plt.subplots()
ax.plot(C_values, mean_C, marker='o', linestyle='-')
ax.set_xticks(C_values)
ax.set_xticklabels(C_values,ha='right')

ax.set_xlabel('C')
ax.set_ylabel('Mean of Accuracies')
ax.set_title('Mean of Accuracies v/s C')

plt.show()


In [None]:
fig, ax = plt.subplots()
ax.plot(Gamma_values, mean_gamma, marker='o', linestyle='-')
ax.set_xticks(Gamma_values)
ax.set_xticklabels(Gamma_values, rotation=45 ,ha='right')

ax.set_xlabel('gamma')
ax.set_ylabel('Mean of Accuracies')
ax.set_title('Mean of Accuracies v/s gamma')

plt.show()

In [None]:
svm_poly_tunned_hog = SVC(kernel='poly', gamma=poly_best_params_initial_hog['gamma'], C =poly_best_params_initial_hog['C'] )
svm_poly_tunned_hog.fit(X_train_50_hog, y_train_50_hog)

In [None]:
joblib.dump(svm_poly_tunned_hog,'svm_poly_hog_n_t.joblib')

In [None]:
poly_t_pred_y_50_hog = svm_poly_tunned_hog.predict(X_test_50_hog)

In [None]:
poly_t_accuracy_hog = accuracy_score(y_test_50_hog,poly_t_pred_y_50_hog)
print("Accuracy of SVM with Polynomial kernel (degree = 3) with tunned hyperparameters is ",poly_t_accuracy_hog)

In [None]:
print(classification_report(y_test_50_hog,poly_t_pred_y_50_hog))

In [None]:
cm_svm_poly_hog_n_t = confusion_matrix(y_test_50, lin_pred_y_50_hog)
fig, axes = plt.subplots(figsize=(5, 5))
sns.heatmap(cm_svm_poly_hog_n_t, annot=True, fmt="d", cmap="Blues", xticklabels=svm_poly_tunned_hog.classes_, yticklabels=svm_poly_tunned_hog.classes_, ax=axes)
axes.set_title('SVM with Polynomial (degree=3) Kernel\n on hog features\n on normalised data with tunned hyperparameters')
axes.set_xlabel('Predicted')
axes.set_ylabel('True')


#### RBF Kernel

In [None]:
rbf_pred_y_50_hog = svm_rbf_hog.predict(X_test_50_hog)

In [None]:
rbf_accuracy_hog = accuracy_score(y_test_50_hog,rbf_pred_y_50_hog)
print("Accuracy of SVM with RBF kernel is ",rbf_accuracy_hog)

In [None]:
print(classification_report(y_test_50_hog,rbf_pred_y_50_hog))

In [None]:
cm_svm_rbf_hog_n = confusion_matrix(y_test_50, rbf_pred_y_50_hog)
fig, axes = plt.subplots(figsize=(5, 5))
sns.heatmap(cm_svm_rbf_hog_n, annot=True, fmt="d", cmap="Blues", xticklabels=svm_rbf_hog.classes_, yticklabels=svm_rbf_hog.classes_, ax=axes)
axes.set_title('SVM with RBF Kernel\n on hog features\n on normalised data')
axes.set_xlabel('Predicted')
axes.set_ylabel('True')

##### Tuning Hyperparameteres

In [None]:
# Define parameter grid
param_grid = {'C': [0.1, 1, 10, 100],
              'gamma': [0.001, 0.01, 0.1, 1]}

# Performing grid search
grid_search_hog_rbf = GridSearchCV(estimator=svm_rbf, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search_hog_rbf.fit(X_train_50_hog, y_train_50_hog)

# Best parameters
rbf_best_params_initial_hog = grid_search_hog_rbf.best_params_
rbf_best_params_initial_hog

In [None]:
C_values = [0.1,1,10,100]
Gamma_values = [0.001,0.01,0.1,1]
mean_C = []
for i in range(len(C_values)):
    sum = 0
    for j in range(len(Gamma_values)):
        sum = sum + grid_search_hog_rbf.cv_results_['mean_test_score'][i+j]
    mean_C.append(sum/len(Gamma_values))
print(mean_C)

mean_gamma = []
for i in range(len(Gamma_values)):
    sum = 0
    for j in range(len(C_values)):
        if (i+j)%len(Gamma_values) == 0:
            sum = sum + grid_search_hog_rbf.cv_results_['mean_test_score'][i+j]
    mean_gamma.append(sum/len(C_values))
print(mean_gamma)

In [None]:
fig, ax = plt.subplots()
ax.plot(C_values, mean_C, marker='o', linestyle='-')
ax.set_xticks(C_values)
ax.set_xticklabels(C_values,ha='right')

ax.set_xlabel('C')
ax.set_ylabel('Mean of Accuracies')
ax.set_title('Mean of Accuracies v/s C')

plt.show()

In [None]:
fig, ax = plt.subplots()
ax.plot(Gamma_values, mean_gamma, marker='o', linestyle='-')
ax.set_xticks(Gamma_values)
ax.set_xticklabels(Gamma_values, rotation=45 ,ha='right')

ax.set_xlabel('gamma')
ax.set_ylabel('Mean of Accuracies')
ax.set_title('Mean of Accuracies v/s gamma')

plt.show()

In [None]:
svm_rbf_tunned_hog = SVC(kernel='rbf', gamma=rbf_best_params_initial_hog['gamma'], C =rbf_best_params_initial_hog['C'] )
svm_rbf_tunned_hog.fit(X_train_50_hog, y_train_50_hog)

In [None]:
joblib.dump(svm_rbf_tunned_hog,'svm_rbf_hog_n_ti.joblib')

In [None]:
rbf_t_pred_y_50_hog = svm_rbf_tunned_hog.predict(X_test_50_hog)

In [None]:
rbf_t_accuracy_hog = accuracy_score(y_test_50_hog,rbf_t_pred_y_50_hog)
print("Accuracy of SVM with rbf kernel with tunned hyperparameters is ",rbf_t_accuracy_hog)

In [None]:
print(classification_report(y_test_50_hog,rbf_t_pred_y_50_hog))

In [None]:
# Defining the starting and ending points for C and gamma, and the number of points
start_C = 0.1
end_C = 10
num_C = 10

start_gamma = 0.001
end_gamma = 0.1
num_gamma = 5

# Parameter grid with specified ranges and number of points
param_grid = {'C': np.linspace(start_C, end_C, num_C),
              'gamma': np.linspace(start_gamma, end_gamma, num_gamma)}


# Performing grid search
grid_search_hog_rbf = GridSearchCV(estimator=svm_rbf, param_grid=param_grid, cv=5 , scoring='accuracy', n_jobs=-1)
grid_search_hog_rbf.fit(X_train_50_hog, y_train_50_hog)

# Best parameters
rbf_best_params_final_hog = grid_search_hog_rbf.best_params_
rbf_best_accuracy_final_hog = grid_search_hog_rbf.best_score_
rbf_best_params_final_hog

In [None]:
svm_rbf_tunned_f_hog = SVC(kernel='rbf', gamma=rbf_best_params_final_hog['gamma'], C =rbf_best_params_final_hog['C'] )
svm_rbf_tunned_f_hog.fit(X_train_50_hog, y_train_50_hog)

In [None]:
joblib.dump(svm_rbf_tunned_f_hog,'svm_rbf_hog_n_tf.joblib')

In [None]:
rbf_t_pred_y_50_f_hog = svm_rbf_tunned_f_hog.predict(X_test_50_hog)

In [None]:
rbf_t_accuracy_f_hog = accuracy_score(y_test_50_hog,rbf_t_pred_y_50_f_hog)
print("Accuracy of SVM with rbf kernel with tunned hyperparameters is ",rbf_t_accuracy_f_hog)

In [None]:
print(classification_report(y_test_50_hog,rbf_t_pred_y_50_f_hog))

In [None]:
cm_svm_rbf_hog_n_tf = confusion_matrix(y_test_50, rbf_t_pred_y_50_f_hog)
fig, axes = plt.subplots(figsize=(5, 5))
sns.heatmap(cm_svm_rbf_hog_n_tf, annot=True, fmt="d", cmap="Blues", xticklabels=svm_rbf_tunned_f_hog.classes_, yticklabels=svm_rbf_tunned_f_hog.classes_, ax=axes)
axes.set_title('SVM with RBF Kernel\n on hog features\n on normalised data with tunned hyperparameters')
axes.set_xlabel('Predicted')
axes.set_ylabel('True')

## LBP Features

In [None]:
X_train_50_df_lbp, X_test_50_df_lbp, y_train_50_lbp, y_test_50_lbp = train_test_split(X_lbp,y_50,train_size=0.8,random_state=42)
X_train_50_df_lbp.reset_index(drop=True, inplace = True)
X_test_50_df_lbp.reset_index(drop=True, inplace = True)
y_train_50_lbp.reset_index(drop=True, inplace = True)
y_test_50_lbp.reset_index(drop=True, inplace = True)

In [None]:
#Normalising the data
scaler_lbp=StandardScaler()
X_train_arr=X_train_50_df_lbp.values
X_test_arr=X_test_50_df_lbp.values
X_train_arr_n=scaler_lbp.fit_transform(X_train_arr)
joblib.dump(scaler_lbp,'Standard_Scalar_Normalisation_lbp.joblib')
X_test_arr_n = scaler_lbp.transform(X_test_arr)
X_train_50_lbp = pd.DataFrame(X_train_arr_n)
X_test_50_lbp = pd.DataFrame(X_test_arr_n)
# X_train_50_lbp

### LinearSVC

In [None]:
# Training Linear Support Vector Classifier (LinearSVC)
lin_clf_lbp = svm.LinearSVC(dual=False)
lin_clf_lbp.fit(X_train_50_lbp, y_train_50_lbp)
joblib.dump(lin_clf_lbp,'linearSVC_lbp.joblib')

In [None]:
pred_y_test_50_lbp = lin_clf_lbp.predict(X_test_50_lbp)

In [None]:
# Accuracy of linearSVC model
linSVC_accuracy = lin_clf_lbp.score(X_test_50_lbp, y_test_50_lbp)
print("Accuracy of linearSVC is ",linSVC_accuracy)

In [None]:
# Classification Report of linearSVC model
print(classification_report(y_test_50_lbp,pred_y_test_50_lbp))

In [None]:
cm_linearSVC_lbp_n = confusion_matrix(y_test_50_lbp, pred_y_test_50_lbp)
fig, axes = plt.subplots(figsize=(5, 5))
sns.heatmap(cm_linearSVC_lbp_n, annot=True, fmt="d", cmap="Blues", xticklabels=lin_clf_lbp.classes_, yticklabels=lin_clf_lbp.classes_, ax=axes)
axes.set_title('LinearSVC on lbp features \n on normalised data')
axes.set_xlabel('Predicted')
axes.set_ylabel('True')

### SVM Models with Linear, polynomial and rbf kernals

In [None]:
svm_linear_lbp = SVC(kernel='linear') # linear kernal
svm_poly_lbp = SVC(kernel='poly', degree= 3)  # Polynomial kernel of degree 3
svm_rbf_lbp = SVC(kernel='rbf')  # RBF kernel

In [None]:
svm_linear_lbp.fit(X_train_50_lbp, y_train_50_lbp)
svm_poly_lbp.fit(X_train_50_lbp, y_train_50_lbp)
svm_rbf_lbp.fit(X_train_50_lbp, y_train_50_lbp)

In [None]:
joblib.dump(svm_linear_lbp,'svm_linear_lbp_n.joblib')
joblib.dump(svm_poly_lbp,'svm_poly_lbp_n.joblib')
joblib.dump(svm_rbf_lbp,'svm_rbf_lbp_n.joblib')

#### Linear Kernel

In [None]:
svm_linear_lbp.get_params()

In [None]:
lin_pred_y_50_lbp = svm_linear_lbp.predict(X_test_50_lbp)

In [None]:
lin_accuracy_lbp = accuracy_score(y_test_50_lbp,lin_pred_y_50_lbp)
print("Accuracy of SVM with Linear kernel is ",lin_accuracy_lbp)

In [None]:
print(classification_report(y_test_50_lbp,lin_pred_y_50_lbp))

In [None]:
cm_svm_linear_lbp_n = confusion_matrix(y_test_50, lin_pred_y_50_lbp)
fig, axes = plt.subplots(figsize=(5, 5))
sns.heatmap(cm_svm_linear_lbp_n, annot=True, fmt="d", cmap="Blues", xticklabels=svm_linear_lbp.classes_, yticklabels=svm_linear_lbp.classes_, ax=axes)
axes.set_title('SVM with Linear Kernel\n on lbp features\n on normalised data')
axes.set_xlabel('Predicted')
axes.set_ylabel('True')

##### Tunning Hyperparameter

In [None]:
c_values = [0.1, 1, 10, 100]

accuracy_values_lbp = []
predicted_values_lbp = []

for i in c_values:
    svm_lin_tunned_lbp = SVC(kernel='linear', C = i )
    svm_lin_tunned_lbp.fit(X_train_50_lbp, y_train_50_lbp)
    lin_t_pred_y_50_lbp = svm_lin_tunned_lbp.predict(X_test_50_lbp)
    lin_t_accuracy_lbp = accuracy_score(y_test_50_lbp,lin_t_pred_y_50_lbp)
    
    accuracy_values_lbp.append(lin_t_accuracy_lbp)
    predicted_values_lbp.append(lin_t_pred_y_50_lbp)
    
print(accuracy_values_lbp)

Therefor, value of c (between 0.1 to 100) is not affecting the accuracy.
So, no requirement of hyperparameter tuning.

#### Polynomial Kernel

In [None]:
svm_poly_lbp.get_params()

In [None]:
poly_pred_y_50_lbp = svm_poly_lbp.predict(X_test_50_lbp)

In [None]:
poly_accuracy_lbp = accuracy_score(y_test_50_lbp,poly_pred_y_50_lbp)
print("Accuracy of SVM with polynomial kernel (of degree 3) is ",poly_accuracy_lbp)

In [None]:
print(classification_report(y_test_50_lbp,poly_pred_y_50_lbp))

In [None]:
cm_svm_poly_lbp_n = confusion_matrix(y_test_50, poly_pred_y_50_lbp)
fig, axes = plt.subplots(figsize=(5, 5))
sns.heatmap(cm_svm_poly_lbp_n, annot=True, fmt="d", cmap="Blues", xticklabels=svm_poly_lbp.classes_, yticklabels=svm_poly_lbp.classes_, ax=axes)
axes.set_title('SVM with Polynomial (degree=3) Kernel\n on lbp features\n on normalised data')
axes.set_xlabel('Predicted')
axes.set_ylabel('True')

##### Tuning Hyperparameters

In [None]:
# Defining parameter grid
param_grid = {'C': [0.1, 1, 10, 100],
              'gamma': [0.001, 0.01, 0.1, 1]}

# Performing grid search
grid_search_lbp_poly = GridSearchCV(estimator=svm_poly_lbp, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search_lbp_poly.fit(X_train_50_lbp,y_train_50_lbp)

# Best parameters
poly_best_params_initial_lbp = grid_search_lbp_poly.best_params_
poly_best_params_initial_lbp

In [None]:
C_values = [0.1,1,10,100]
Gamma_values = [0.001,0.01,0.1,1]
mean_C = []
for i in range(len(C_values)):
    sum = 0
    for j in range(len(Gamma_values)):
        sum = sum + grid_search_lbp_poly.cv_results_['mean_test_score'][i+j]
    mean_C.append(sum/len(Gamma_values))
print(mean_C)

mean_gamma = []
for i in range(len(Gamma_values)):
    sum = 0
    for j in range(len(C_values)):
        if (i+j)%len(Gamma_values) == 0:
            sum = sum + grid_search_lbp_poly.cv_results_['mean_test_score'][i+j]
    mean_gamma.append(sum/len(C_values))
print(mean_gamma)

In [None]:
fig, ax = plt.subplots()
ax.plot(C_values, mean_C, marker='o', linestyle='-')
ax.set_xticks(C_values)
ax.set_xticklabels(C_values,ha='right')

ax.set_xlabel('C')
ax.set_ylabel('Mean of Accuracies')
ax.set_title('Mean of Accuracies v/s C')

plt.show()


In [None]:
fig, ax = plt.subplots()
ax.plot(Gamma_values, mean_gamma, marker='o', linestyle='-')
ax.set_xticks(Gamma_values)
ax.set_xticklabels(Gamma_values, rotation=45 ,ha='right')

ax.set_xlabel('gamma')
ax.set_ylabel('Mean of Accuracies')
ax.set_title('Mean of Accuracies v/s gamma')

plt.show()

In [None]:
svm_poly_tunned_lbp = SVC(kernel='poly', gamma=poly_best_params_initial_lbp['gamma'], C =poly_best_params_initial_lbp['C'] )
svm_poly_tunned_lbp.fit(X_train_50_lbp, y_train_50_lbp)

In [None]:
joblib.dump(svm_poly_tunned_lbp,'svm_poly_lbp_n_t.joblib')

In [None]:
poly_t_pred_y_50_lbp = svm_poly_tunned_lbp.predict(X_test_50_lbp)

In [None]:
poly_t_accuracy_lbp = accuracy_score(y_test_50_lbp,poly_t_pred_y_50_lbp)
print("Accuracy of SVM with Polynomial kernel (degree = 3) with tunned hyperparameters is ",poly_t_accuracy_lbp)

In [None]:
print(classification_report(y_test_50_lbp,poly_t_pred_y_50_lbp))

In [None]:
cm_svm_poly_lbp_n_t = confusion_matrix(y_test_50, lin_pred_y_50_lbp)
fig, axes = plt.subplots(figsize=(5, 5))
sns.heatmap(cm_svm_poly_lbp_n_t, annot=True, fmt="d", cmap="Blues", xticklabels=svm_poly_tunned_lbp.classes_, yticklabels=svm_poly_tunned_lbp.classes_, ax=axes)
axes.set_title('SVM with Polynomial (degree=3) Kernel\n on lbp features\n on normalised data with tunned hyperparameters')
axes.set_xlabel('Predicted')
axes.set_ylabel('True')


#### RBF Kernel

In [None]:
rbf_pred_y_50_lbp = svm_rbf_lbp.predict(X_test_50_lbp)

In [None]:
rbf_accuracy_lbp = accuracy_score(y_test_50_lbp,rbf_pred_y_50_lbp)
print("Accuracy of SVM with RBF kernel is ",rbf_accuracy_lbp)

In [None]:
print(classification_report(y_test_50_lbp,rbf_pred_y_50_lbp))

In [None]:
cm_svm_rbf_lbp_n = confusion_matrix(y_test_50, rbf_pred_y_50_lbp)
fig, axes = plt.subplots(figsize=(5, 5))
sns.heatmap(cm_svm_rbf_lbp_n, annot=True, fmt="d", cmap="Blues", xticklabels=svm_rbf_lbp.classes_, yticklabels=svm_rbf_lbp.classes_, ax=axes)
axes.set_title('SVM with RBF Kernel\n on lbp features\n on normalised data')
axes.set_xlabel('Predicted')
axes.set_ylabel('True')

##### Tuning Hyperparameteres

In [None]:
# Define parameter grid
param_grid = {'C': [0.1, 1, 10, 100],
              'gamma': [0.001, 0.01, 0.1, 1]}

# Performing grid search
grid_search_lbp_rbf = GridSearchCV(estimator=svm_rbf, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search_lbp_rbf.fit(X_train_50_lbp, y_train_50_lbp)

# Best parameters
rbf_best_params_initial_lbp = grid_search_lbp_rbf.best_params_
rbf_best_params_initial_lbp

In [None]:
C_values = [0.1,1,10,100]
Gamma_values = [0.001,0.01,0.1,1]
mean_C = []
for i in range(len(C_values)):
    sum = 0
    for j in range(len(Gamma_values)):
        sum = sum + grid_search_lbp_rbf.cv_results_['mean_test_score'][i+j]
    mean_C.append(sum/len(Gamma_values))
print(mean_C)

mean_gamma = []
for i in range(len(Gamma_values)):
    sum = 0
    for j in range(len(C_values)):
        if (i+j)%len(Gamma_values) == 0:
            sum = sum + grid_search_lbp_rbf.cv_results_['mean_test_score'][i+j]
    mean_gamma.append(sum/len(C_values))
print(mean_gamma)

In [None]:
fig, ax = plt.subplots()
ax.plot(C_values, mean_C, marker='o', linestyle='-')
ax.set_xticks(C_values)
ax.set_xticklabels(C_values,ha='right')

ax.set_xlabel('C')
ax.set_ylabel('Mean of Accuracies')
ax.set_title('Mean of Accuracies v/s C')

plt.show()

In [None]:
fig, ax = plt.subplots()
ax.plot(Gamma_values, mean_gamma, marker='o', linestyle='-')
ax.set_xticks(Gamma_values)
ax.set_xticklabels(Gamma_values, rotation=45 ,ha='right')

ax.set_xlabel('gamma')
ax.set_ylabel('Mean of Accuracies')
ax.set_title('Mean of Accuracies v/s gamma')

plt.show()

In [None]:
svm_rbf_tunned_lbp = SVC(kernel='rbf', gamma=rbf_best_params_initial_lbp['gamma'], C =rbf_best_params_initial_lbp['C'] )
svm_rbf_tunned_lbp.fit(X_train_50_lbp, y_train_50_lbp)

In [None]:
joblib.dump(svm_rbf_tunned_lbp,'svm_rbf_lbp_n_ti.joblib')

In [None]:
rbf_t_pred_y_50_lbp = svm_rbf_tunned_lbp.predict(X_test_50_lbp)

In [None]:
rbf_t_accuracy_lbp = accuracy_score(y_test_50_lbp,rbf_t_pred_y_50_lbp)
print("Accuracy of SVM with rbf kernel with tunned hyperparameters is ",rbf_t_accuracy_lbp)

In [None]:
print(classification_report(y_test_50_lbp,rbf_t_pred_y_50_lbp))

In [None]:
# Defining the starting and ending points for C and gamma, and the number of points
start_C = 0.1
end_C = 10
num_C = 10

start_gamma = 0.001
end_gamma = 0.1
num_gamma = 5

# Parameter grid with specified ranges and number of points
param_grid = {'C': np.linspace(start_C, end_C, num_C),
              'gamma': np.linspace(start_gamma, end_gamma, num_gamma)}


# Performing grid search
grid_search_lbp_rbf = GridSearchCV(estimator=svm_rbf, param_grid=param_grid, cv=5 , scoring='accuracy', n_jobs=-1)
grid_search_lbp_rbf.fit(X_train_50_lbp, y_train_50_lbp)

# Best parameters
rbf_best_params_final_lbp = grid_search_lbp_rbf.best_params_
rbf_best_accuracy_final_lbp = grid_search_lbp_rbf.best_score_
rbf_best_params_final_lbp

In [None]:
svm_rbf_tunned_f_lbp = SVC(kernel='rbf', gamma=rbf_best_params_final_lbp['gamma'], C =rbf_best_params_final_lbp['C'] )
svm_rbf_tunned_f_lbp.fit(X_train_50_lbp, y_train_50_lbp)

In [None]:
joblib.dump(svm_rbf_tunned_f_lbp,'svm_rbf_lbp_n_tf.joblib')

In [None]:
rbf_t_pred_y_50_f_lbp = svm_rbf_tunned_f_lbp.predict(X_test_50_lbp)

In [None]:
rbf_t_accuracy_f_lbp = accuracy_score(y_test_50_lbp,rbf_t_pred_y_50_f_lbp)
print("Accuracy of SVM with rbf kernel with tunned hyperparameters is ",rbf_t_accuracy_f_lbp)

In [None]:
print(classification_report(y_test_50_lbp,rbf_t_pred_y_50_f_lbp))

In [None]:
cm_svm_rbf_lbp_n_tf = confusion_matrix(y_test_50, rbf_t_pred_y_50_f_lbp)
fig, axes = plt.subplots(figsize=(5, 5))
sns.heatmap(cm_svm_rbf_lbp_n_tf, annot=True, fmt="d", cmap="Blues", xticklabels=svm_rbf_tunned_f_lbp.classes_, yticklabels=svm_rbf_tunned_f_lbp.classes_, ax=axes)
axes.set_title('SVM with RBF Kernel\n on lbp features\n on normalised data with tunned hyperparameters')
axes.set_xlabel('Predicted')
axes.set_ylabel('True')