In [1]:
import numpy as np
import scipy 
import pandas as pd
from libsvm.svmutil import *
from sklearn.preprocessing import MinMaxScaler

import SVM_functions

# Experiment 1: Apply Support Vector Machines (SVMs) for gender classification. Experiment both with polynomial and RBF kernels as well as with different C values. Show your results both for 16x20 and 48x60 size images.


### Naming convention:
#### EigenVectors_xx: eigenvectors of training images for fold xx 
#### EigenValues_xx:  eigenvalues of training images for fold xx

#### trPCA_xx: 	PCA projected training images for fold xx
#### TtrPCA_xx: labels of projected training images for fold xx

#### valPCA_xx: PCA projected validation images for fold xx
#### TvalPCA_xx: labels of projected validation images for fold xx

#### tsPCA_xx: PCA projected test images for fold xx
#### TtsPCA_xx: labels of projected test images for fold xx



# Fold 1

## High Resolution

In [2]:
# training data
train_eigencoefficients_f1  = np.loadtxt('GenderDataRowOrder/48_60/trPCA_01.txt')
train_class_labels_f1  = np.loadtxt('GenderDataRowOrder/48_60/TtrPCA_01.txt')

# validation data

val_eigencoefficients_f1 = np.loadtxt('GenderDataRowOrder/48_60/valPCA_01.txt')
val_class_labels_f1  = np.loadtxt('GenderDataRowOrder/48_60/TvalPCA_01.txt')

### Polynomial kernel

In [3]:
fold1_poly_results, fold1_poly_misclassifications = SVM_functions.train_model(train_eigencoefficients_f1[:,:30], train_class_labels_f1, val_eigencoefficients_f1[:,:30], val_class_labels_f1 , kernel='poly')



### RBF kernel

In [4]:
fold1_RBF_results, fold1_RBF_misclassifications = SVM_functions.train_model(train_eigencoefficients_f1[:,:30], train_class_labels_f1, val_eigencoefficients_f1[:,:30], val_class_labels_f1 , kernel='RBF')



## Low Resolution

In [5]:
# training data
train_eigencoefficients_f1_lowres  = np.loadtxt('GenderDataRowOrder/16_20/trPCA_01.txt')
train_class_labels_f1_lowres  = np.loadtxt('GenderDataRowOrder/16_20/TtrPCA_01.txt')

# validation data

val_eigencoefficients_f1_lowres = np.loadtxt('GenderDataRowOrder/16_20/valPCA_01.txt')
val_class_labels_f1_lowres  = np.loadtxt('GenderDataRowOrder/16_20/TvalPCA_01.txt')

### Polynomial kernel


In [6]:
fold1_poly_results_lowres, fold1_poly_misclassifications_lowres = SVM_functions.train_model(train_eigencoefficients_f1_lowres[:,:30], train_class_labels_f1_lowres, val_eigencoefficients_f1_lowres[:,:30], val_class_labels_f1_lowres , kernel='poly')



### RBF kernel

In [7]:
fold1_RBF_results_lowres, fold1_RBF_misclassifications_lowres = SVM_functions.train_model(train_eigencoefficients_f1_lowres[:,:30], train_class_labels_f1_lowres, val_eigencoefficients_f1_lowres[:,:30], val_class_labels_f1_lowres , kernel='RBF')




# Fold 2

## High Resolution

In [8]:
train_eigencoefficients_f2  = np.loadtxt('GenderDataRowOrder/48_60/trPCA_02.txt')
train_class_labels_f2  = np.loadtxt('GenderDataRowOrder/48_60/TtrPCA_02.txt')

val_eigencoefficients_f2 = np.loadtxt('GenderDataRowOrder/48_60/valPCA_02.txt')
val_class_labels_f2  = np.loadtxt('GenderDataRowOrder/48_60/TvalPCA_02.txt')

### Polynomial kernel

In [9]:
fold2_poly_results, fold2_poly_misclassifications = SVM_functions.train_model(train_eigencoefficients_f2[:,:30], train_class_labels_f2, val_eigencoefficients_f2[:,:30], val_class_labels_f2 , kernel='poly')



### RBF kernel

In [10]:
fold2_RBF_results, fold2_RBF_misclassifications = SVM_functions.train_model(train_eigencoefficients_f2[:,:30], train_class_labels_f2, val_eigencoefficients_f2[:,:30], val_class_labels_f2 , kernel='RBF')




## Low Resolution

In [11]:
train_eigencoefficients_f2_lowres  = np.loadtxt('GenderDataRowOrder/16_20/trPCA_02.txt')
train_class_labels_f2_lowres  = np.loadtxt('GenderDataRowOrder/16_20/TtrPCA_02.txt')

val_eigencoefficients_f2_lowres = np.loadtxt('GenderDataRowOrder/16_20/valPCA_02.txt')
val_class_labels_f2_lowres  = np.loadtxt('GenderDataRowOrder/16_20/TvalPCA_02.txt')

### Polynomial kernel

In [12]:
fold2_poly_results_lowres, fold2_poly_misclassifications_lowres = SVM_functions.train_model(train_eigencoefficients_f2_lowres[:,:30], train_class_labels_f2_lowres, val_eigencoefficients_f2_lowres[:,:30], val_class_labels_f2_lowres , kernel='poly')



### RBF kernel

In [13]:
fold2_RBF_results_lowres, fold2_RBF_misclassifications_lowres = SVM_functions.train_model(train_eigencoefficients_f2_lowres[:,:30], train_class_labels_f2_lowres, val_eigencoefficients_f2_lowres[:,:30], val_class_labels_f2_lowres , kernel='RBF')




# Fold 3

## High Resolution

In [14]:
train_eigencoefficients_f3  = np.loadtxt('GenderDataRowOrder/48_60/trPCA_03.txt')
train_class_labels_f3  = np.loadtxt('GenderDataRowOrder/48_60/TtrPCA_03.txt')

val_eigencoefficients_f3 = np.loadtxt('GenderDataRowOrder/48_60/valPCA_03.txt')
val_class_labels_f3  = np.loadtxt('GenderDataRowOrder/48_60/TvalPCA_03.txt')

### Polynomial kernel

In [15]:
fold3_poly_results, fold3_poly_misclassifications = SVM_functions.train_model(train_eigencoefficients_f3[:,:30], train_class_labels_f3, val_eigencoefficients_f3[:,:30], val_class_labels_f3 , kernel='poly')



### RBF kernel

In [16]:
fold3_RBF_results, fold3_RBF_misclassifications = SVM_functions.train_model(train_eigencoefficients_f3[:,:30], train_class_labels_f3, val_eigencoefficients_f3[:,:30], val_class_labels_f3 , kernel='RBF')




## Low Resolution

In [17]:
train_eigencoefficients_f3_lowres  = np.loadtxt('GenderDataRowOrder/16_20/trPCA_03.txt')
train_class_labels_f3_lowres  = np.loadtxt('GenderDataRowOrder/16_20/TtrPCA_03.txt')

val_eigencoefficients_f3_lowres = np.loadtxt('GenderDataRowOrder/16_20/valPCA_03.txt')
val_class_labels_f3_lowres  = np.loadtxt('GenderDataRowOrder/16_20/TvalPCA_03.txt')

### Polynomial kernel

In [18]:
fold3_poly_results_lowres, fold3_poly_misclassifications_lowres = SVM_functions.train_model(train_eigencoefficients_f3_lowres[:,:30], train_class_labels_f3_lowres, val_eigencoefficients_f3_lowres[:,:30], val_class_labels_f3_lowres , kernel='poly')



### RBF kernel

In [19]:
fold3_RBF_results_lowres, fold3_RBF_misclassifications_lowres = SVM_functions.train_model(train_eigencoefficients_f3_lowres[:,:30], train_class_labels_f3_lowres, val_eigencoefficients_f3_lowres[:,:30], val_class_labels_f3_lowres , kernel='RBF')




# Find the optimum set of parameters (γopt, Copt)

### High Resolution Images

In [20]:
misclassifications_highres = [np.concatenate([fold1_poly_misclassifications,fold1_RBF_misclassifications]), np.concatenate([fold2_poly_misclassifications,fold2_RBF_misclassifications]), np.concatenate([fold3_poly_misclassifications,fold3_RBF_misclassifications])]
kernel_summaries_highres= [np.concatenate([fold1_poly_results, fold1_RBF_results]), np.concatenate([fold2_poly_results, fold2_RBF_results]), np.concatenate([fold3_poly_results, fold3_RBF_results])]

high_res_summarydf = SVM_functions.get_optimum_params(misclassifications_highres, kernel_summaries_highres)


Best average error:  [9.02255639]
Best parameters:  ['RBF, gamma= 0.1, C= 1']


In [21]:
high_res_summarydf 

Unnamed: 0,Params,Fold1,Fold2,Fold3,Average
0,"Polynomial, d= 1, C= 0.1",10.526316,4.511278,12.781955,9.273183
1,"Polynomial, d= 1, C= 1",9.022556,8.270677,23.308271,13.533835
2,"Polynomial, d= 1, C= 10",10.526316,9.022556,26.315789,15.288221
3,"Polynomial, d= 1, C= 100",10.526316,17.293233,26.315789,18.045113
4,"Polynomial, d= 2, C= 0.1",18.045113,20.300752,20.300752,19.548872
5,"Polynomial, d= 2, C= 1",16.541353,23.308271,21.052632,20.300752
6,"Polynomial, d= 2, C= 10",16.541353,23.308271,21.052632,20.300752
7,"Polynomial, d= 2, C= 100",16.541353,23.308271,21.052632,20.300752
8,"Polynomial, d= 3, C= 0.1",9.774436,6.015038,13.533835,9.774436
9,"Polynomial, d= 3, C= 1",9.774436,6.015038,13.533835,9.774436


### Low Resolution Images

In [22]:
misclassifications_lowres = [np.concatenate([fold1_poly_misclassifications_lowres,fold1_RBF_misclassifications_lowres]), np.concatenate([fold2_poly_misclassifications_lowres,fold2_RBF_misclassifications_lowres]), np.concatenate([fold3_poly_misclassifications_lowres,fold3_RBF_misclassifications_lowres])]
kernel_summaries_lowres= [np.concatenate([fold1_poly_results_lowres, fold1_RBF_results_lowres]), np.concatenate([fold2_poly_results_lowres, fold2_RBF_results_lowres]), np.concatenate([fold3_poly_results_lowres, fold3_RBF_results_lowres])]

low_res_summarydf = SVM_functions.get_optimum_params(misclassifications_lowres, kernel_summaries_lowres)



Best average error:  [7.26817043 7.26817043]
Best parameters:  ['RBF, gamma= 0.1, C= 1', 'RBF, gamma= 0.1, C= 10']


In [23]:
low_res_summarydf

Unnamed: 0,Params,Fold1,Fold2,Fold3,Average
0,"Polynomial, d= 1, C= 0.1",11.278195,6.766917,8.270677,8.77193
1,"Polynomial, d= 1, C= 1",9.022556,11.278195,8.270677,9.52381
2,"Polynomial, d= 1, C= 10",13.533835,15.037594,11.278195,13.283208
3,"Polynomial, d= 1, C= 100",9.774436,14.285714,11.278195,11.779449
4,"Polynomial, d= 2, C= 0.1",23.308271,20.300752,18.796992,20.802005
5,"Polynomial, d= 2, C= 1",22.556391,20.300752,23.308271,22.055138
6,"Polynomial, d= 2, C= 10",22.556391,20.300752,23.308271,22.055138
7,"Polynomial, d= 2, C= 100",22.556391,20.300752,23.308271,22.055138
8,"Polynomial, d= 3, C= 0.1",14.285714,7.518797,13.533835,11.779449
9,"Polynomial, d= 3, C= 1",14.285714,7.518797,13.533835,11.779449


# Using the SVM model trained on (γopt, Copt), compute the classification error on the test set. This process must be repeated for each fold separately to compute the classification error for each fold as well as the average classification error over all folds as described earlier.

## Fold 1 

### High Resolution

In [24]:
test_eigencoefficients_f1 = np.loadtxt('GenderDataRowOrder/48_60/tsPCA_01.txt')
test_class_labels_f1  = np.loadtxt('GenderDataRowOrder/48_60/TtsPCA_01.txt')

In [25]:
params= '-s 0 -t 2 -g 0.1 -c 1'
e1 = SVM_functions.run_SVM(train_eigencoefficients_f1[:,:30], train_class_labels_f1, test_eigencoefficients_f1[:,:30], test_class_labels_f1, params)


*.*
optimization finished, #iter = 165
nu = 0.565958
obj = -48.349285, rho = -0.448555
nSV = 96, nBSV = 55
Total nSV = 96
Accuracy = 80.4511% (107/133) (classification)


### Low Resolution

In [26]:
test_eigencoefficients_f1_lowres = np.loadtxt('GenderDataRowOrder/16_20/tsPCA_01.txt')
test_class_labels_f1_lowres  = np.loadtxt('GenderDataRowOrder/16_20/TtsPCA_01.txt')

In [27]:
params= '-s 0 -t 2 -g 0.1 -c 1'
e1_lr = SVM_functions.run_SVM(train_eigencoefficients_f1_lowres[:,:30], train_class_labels_f1_lowres, test_eigencoefficients_f1_lowres[:,:30], test_class_labels_f1_lowres, params)



*.*
optimization finished, #iter = 168
nu = 0.610539
obj = -51.696565, rho = -0.202279
nSV = 107, nBSV = 62
Total nSV = 107
Accuracy = 90.9774% (121/133) (classification)


## Fold 2

### High Resolution

In [28]:
test_eigencoefficients_f2 = np.loadtxt('GenderDataRowOrder/48_60/tsPCA_02.txt')
test_class_labels_f2  = np.loadtxt('GenderDataRowOrder/48_60/TtsPCA_02.txt')

In [29]:
params= '-s 0 -t 2 -g 0.1 -c 1'
e2 = SVM_functions.run_SVM(train_eigencoefficients_f2[:,:30], train_class_labels_f2, test_eigencoefficients_f2[:,:30], test_class_labels_f2, params)



*.*
optimization finished, #iter = 172
nu = 0.617994
obj = -52.360434, rho = -0.111316
nSV = 105, nBSV = 57
Total nSV = 105
Accuracy = 91.7293% (122/133) (classification)


### Low Resolution

In [30]:
test_eigencoefficients_f2_lowres = np.loadtxt('GenderDataRowOrder/16_20/tsPCA_02.txt')
test_class_labels_f2_lowres  = np.loadtxt('GenderDataRowOrder/16_20/TtsPCA_02.txt')

In [31]:
params= '-s 0 -t 2 -g 0.1 -c 1'
e2_lr = SVM_functions.run_SVM(train_eigencoefficients_f2_lowres[:,:30], train_class_labels_f2_lowres, test_eigencoefficients_f2_lowres[:,:30], test_class_labels_f2_lowres, params)



*.*
optimization finished, #iter = 174
nu = 0.653638
obj = -56.787041, rho = -0.268447
nSV = 108, nBSV = 63
Total nSV = 108
Accuracy = 90.9774% (121/133) (classification)


## Fold 3

### High Resolution

In [32]:
test_eigencoefficients_f3 = np.loadtxt('GenderDataRowOrder/48_60/tsPCA_03.txt')
test_class_labels_f3  = np.loadtxt('GenderDataRowOrder/48_60/TtsPCA_03.txt')

In [33]:
params= '-s 0 -t 2 -g 0.1 -c 1'
e3 = SVM_functions.run_SVM(train_eigencoefficients_f3[:,:30], train_class_labels_f3, test_eigencoefficients_f3[:,:30], test_class_labels_f3, params)




*.*
optimization finished, #iter = 135
nu = 0.543411
obj = -46.176618, rho = 0.346135
nSV = 91, nBSV = 54
Total nSV = 91
Accuracy = 86.4662% (115/133) (classification)


### Low Resolution

In [34]:
test_eigencoefficients_f3_lowres = np.loadtxt('GenderDataRowOrder/16_20/tsPCA_03.txt')
test_class_labels_f3_lowres  = np.loadtxt('GenderDataRowOrder/16_20/TtsPCA_03.txt')

In [35]:
params= '-s 0 -t 2 -g 0.1 -c 1'
e3_lr= SVM_functions.run_SVM(train_eigencoefficients_f3_lowres[:,:30], train_class_labels_f3_lowres, test_eigencoefficients_f3_lowres[:,:30], test_class_labels_f3_lowres, params)




*.*
optimization finished, #iter = 167
nu = 0.614725
obj = -51.565169, rho = 0.069233
nSV = 102, nBSV = 58
Total nSV = 102
Accuracy = 90.2256% (120/133) (classification)


# Misclassification rates for all folds

In [36]:
print('Misclassification Error (48x60): \n \n', 'Fold 1:', e1, '\n Fold 2:', e2, '\n Fold 3:', e3, '\n Average:', np.sum([e1,e2,e3])/3)

Misclassification Error (48x60): 
 
 Fold 1: [19.54887218] 
 Fold 2: [8.27067669] 
 Fold 3: [13.53383459] 
 Average: 13.78446115288221


In [37]:
print('Misclassification Error (16x20): \n \n', 'Fold 1:', e1_lr, '\n Fold 2:', e2_lr, '\n Fold 3:', e3_lr, '\n Average:', np.sum([e1_lr,e2_lr,e3_lr])/3)



Misclassification Error (16x20): 
 
 Fold 1: [9.02255639] 
 Fold 2: [9.02255639] 
 Fold 3: [9.77443609] 
 Average: 9.273182957393482
