In [1]:
## Import libraries
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from skimage.color import rgb2gray
import pylab as pl
import pandas as pd

In [2]:
Xtrain = np.load('train_data.npy')
Ytrain = np.load('train_labels.npy')
labels = ['A','B','C','D','E','F','G','H','I']
Xtrain.shape, Ytrain.shape

((1844, 100, 100, 3), (1844, 1))

In [3]:
# Vectorize origional data so other preprocessing techniques can be used. 
Xtrain2 = []
for k in range(len(Xtrain)):   
        X_train1 = rgb2gray(Xtrain[k,:,:,:])
        Xtrain2 += [X_train1.ravel()]
Xtrain2 = np.array(Xtrain2)
Xtrain2.shape, Xtrain.shape, len(Xtrain)

((1844, 10000), (1844, 100, 100, 3), 1844)

In [4]:
## Normalize and separate data into training and validation sets.
from sklearn.model_selection import train_test_split

X_train_norm = Xtrain2/float(255)

X_train, X_test, y_train, y_test = train_test_split(X_train_norm, Ytrain, test_size=0.2)
y_train = np.array(y_train.ravel()) 
y_test = np.array(y_test.ravel())
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((1475, 10000), (369, 10000), (1475,), (369,))

In [5]:
#Warning: takes long time to load 
import matplotlib.pyplot as plt 
import numpy as np 
import seaborn as sns 
from sklearn.model_selection import train_test_split 
from sklearn.svm import SVC 
%matplotlib inline 

#accuracy of testing SVC-rbf model on training set 
C_values = np.arange(5, 35, 5) #specific C values to test 
gamma_values = np.arange(100, 1100, 100) #specific gamma values to test 
# score_training = np.empty(shape=(C_values.size, gamma_values.size)) #matrix for score heatmap (training set)  
# score_testing = np.empty(shape=(C_values.size, gamma_values.size)) #matrix for score heatmap (testing set)

#in case user does not append values in sorted order above 
np.sort(a=C_values, axis=None)
np.sort(a=gamma_values, axis=None)

print("C_values: {}".format(C_values)) 
print("gamma_values: {}".format(gamma_values)) 

# computing accuracy against C (gamma='default'); predicted on train data 
scores_svc_C_train = []
for C in C_values: 
    svc = SVC(C=C, kernel='rbf')
    svc.fit(X=X_train, y=y_train)
    scores_svc_C_train.append(svc.score(X=X_train, y=y_train)) 
    
print("scores_svc_C_train: \n{}".format(scores_svc_C_train))

# computing accuracy against C (gamma='default'); predicted on test data 
# scores_svc_C_test = []
# for C in C_values: 
#     svc = SVC(C=C, kernel='rbf')
#     svc.fit(X=X_train, y=y_train)
#     scores_svc_C_test.append(svc.score(X=X_test, y=y_test))
    
# print("scores_svc_C_test: \n{}".format(scores_svc_C_test)) 

#computing accuracy against gamma (C=1); predicted on train data 
scores_svc_gamma_train = []
for gamma in gamma_values: 
    svc = SVC(gamma=gamma, kernel='rbf')
    svc.fit(X=X_train, y=y_train)
    scores_svc_gamma_train.append(svc.score(X=X_train, y=y_train))

print("scores_svc_gamma_train: \n{}".format(scores_svc_gamma_train))

#computing accuracy against gamma (C=1); predicted on test data 
# scores_svc_gamma_test = []
# for gamma in gamma_values: 
#     svc = SVC(gamma=gamma, kernel='rbf')
#     svc.fit(X=X_train, y=y_train)
#     scores_svc_gamma_test.append(svc.score(X=X_test, y=y_test))

# print("scores_svc_gamma_test: \n{}".format(scores_svc_gamma_test))
              
#setting up for plots 
sns.set_style(style='darkgrid') 
fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(12, 12))

#plotting accuracy vs. C (predicted on train set)
sns.lineplot(x=C_values, y=scores_svc_C_train, color='blue', marker='o', ax=ax1)
ax1.set_title(label='Accuracy of SVC vs. C', fontsize=24)
ax1.set_xlabel(xlabel='C', fontsize=18)
ax1.set_ylabel(ylabel='Score', fontsize=18)

# #plotting accuracy vs. C (predicted on test set)
# sns.lineplot(x=C_values, y=scores_svc_C_test, color='green', marker='o', ax=ax1)
# ax1.legend(labels=['Train Set', 'Test Set'], loc='lower right')

#plotting accuracy vs. Gamma (predicted on test set) 
sns.lineplot(x=gamma_values, y=scores_svc_gamma_train, color='blue', marker='s', ax=ax2)
ax2.set_title(label='Accuracy of SVC vs.Gamma', fontsize=24)
ax2.set_xlabel(xlabel='Gamma', fontsize=18)
ax2.set_ylabel(ylabel='Score', fontsize=18)

# #plotting accuracy vs. Gamma (predicted on test set)
# sns.lineplot(x=gamma_values, y=scores_svc_gamma_test, color='green', marker='s', ax=ax2)
# ax2.legend(labels=['Train Set', 'Test Set'], loc='lower right')

plt.grid(b=True, which='major', axis='both')
plt.show()

C_values: [ 5 10 15 20 25 30]
gamma_values: [ 100  200  300  400  500  600  700  800  900 1000]




KeyboardInterrupt: 

In [6]:
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline

svc = SVC(kernel='rbf', class_weight='balanced')
model = make_pipeline(svc)

In [None]:
from sklearn.model_selection import GridSearchCV
param_grid = {'svc__C': [5, 10, 15, 20, 30, 35],
              'svc__gamma': [300, 400, 500, 600, 700, 800, 900, 1000]}
grid = GridSearchCV(model, param_grid)

%time grid.fit(X_train, y_train)
print(grid.best_params_)

