In [2]:
import csv as csv 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn import svm
from sklearn import cross_validation
from scipy import linalg

# Open up the csv file in to a Python object
data = pd.read_csv('2013MT60597.csv',header = -1)
# data.describe()

In [3]:
X_train_raw = data.iloc[:,0:25]
Y_train = data.iloc[:,25]


#normalize the data
scaler = preprocessing.StandardScaler()
X_train = scaler.fit_transform(X_train_raw)

a,b,c = linalg.svd(X_train)
print(b)

[ 59.2730931   58.37051386  57.85374196  57.35637875  57.19679766
  57.06698198  56.27697423  56.16808756  56.11975154  55.40697927
  55.25683585  55.02877665  54.98227379  54.53023193  54.14857773
  54.09110117  53.54706172  53.28792843  52.7684147   52.4185597
  52.16889704  51.74522114  51.33870103  51.17326139  50.44524561]


# Observations:
* From the above SVD of the design matrix, we can see that all of the singular values are significant
* There is no sudden drop in singular values, which shows that all dimensions hold some significant variance of the data
* Even if we take the first 10 features, we wont get better performance




In [4]:
#Create a subset of the database 
# choose only those classes whose labels are 2 and 3
B = np.zeros(Y_train.shape[0],dtype=bool)
for i in range(0,Y_train.shape[0]):
    if(Y_train[i] == 2 or Y_train[i] == 3):
        B[i] = True

X_23 = X_train[B]
Y_23 = Y_train[B]
# X_23.iloc[:,0:10]
m,n,p = linalg.svd(X_23)
print(n)

[ 44.71217765  44.03878445  39.60494411  37.64408858  35.37886609
  33.87416604  28.8948027   28.62889521  28.36832229  27.70021537
  26.22306144  24.80111528  22.74054369  22.35806348  21.1204619
  19.83268669  18.00740132  17.26323514  14.84546306  13.73647287
  13.24691147  11.9778114   11.29207202  10.31837048   8.96458364]


In [5]:
#Taking the first 10 features
clf = svm.SVC()
# clf.fit(X_23,Y_23)
score = cross_validation.cross_val_score(clf, X_23[:,0:10],Y_23, cv=10, n_jobs=4).mean()
print(score)
C = [0.01,0.1,0.5,1,1.5,2,10]
G = [0.01,0.1,0.3,0.5,0.7,0.9,2]
scores = np.zeros([7,7])
for i in range(0,7):
    for j in range(0,7):
        clf = svm.SVC(C = C[i], gamma=G[j])
        score = cross_validation.cross_val_score(clf, X_23[:,0:10],Y_23, cv=10, n_jobs=4).mean()
        scores[i,j] = score
print(scores)
print(np.amax(scores))


0.971401369688
[[ 0.51029346  0.83047315  0.51029346  0.51029346  0.51029346  0.51029346
   0.51029346]
 [ 0.96033906  0.96666427  0.96510257  0.86856999  0.60219054  0.51190636
   0.51029346]
 [ 0.96351366  0.97140137  0.98251328  0.97147657  0.94302835  0.88447021
   0.5515417 ]
 [ 0.96507616  0.97140137  0.98568788  0.98576309  0.97628968  0.95729087
   0.79262833]
 [ 0.96348886  0.97140137  0.98732559  0.98737599  0.97470238  0.95729087
   0.80371544]
 [ 0.96031426  0.97460157  0.98573829  0.98737599  0.97470238  0.95729087
   0.80371544]
 [ 0.96661386  0.97780178  0.98415099  0.98737599  0.97470238  0.95729087
   0.80371544]]
0.987375992063


In [6]:
#for the entire 25 features


clf = svm.SVC()
# clf.fit(X_23,Y_23)
score = cross_validation.cross_val_score(clf, X_23,Y_23, cv=10, n_jobs=4).mean()
print(score)
C = [0.01,0.1,0.5,1,1.5,2,10]
G = [0.01,0.1,0.3,0.5,0.7,0.9,2]
scores = np.zeros([7,7])
for i in range(0,7):
    for j in range(0,7):
        clf = svm.SVC(C = C[i], gamma=G[j])
        score = cross_validation.cross_val_score(clf, X_23,Y_23, cv=10, n_jobs=4).mean()
        scores[i,j] = score
print(scores)
print(np.amax(scores))       
        
        

0.977826580901
[[ 0.51029346  0.51029346  0.51029346  0.51029346  0.51029346  0.51029346
   0.51029346]
 [ 0.95880136  0.8859807   0.51029346  0.51029346  0.51029346  0.51029346
   0.51029346]
 [ 0.97301267  0.97462638  0.57056532  0.53566708  0.52140617  0.51029346
   0.51029346]
 [ 0.97303827  0.97777618  0.86867079  0.67369832  0.56424091  0.54045459
   0.53566708]
 [ 0.97462558  0.97618888  0.8781698   0.69912234  0.58331493  0.553129
   0.53566708]
 [ 0.97306388  0.97618888  0.8781698   0.69912234  0.58331493  0.553129
   0.53566708]
 [ 0.97938908  0.97618888  0.8781698   0.69912234  0.58331493  0.553129
   0.53566708]]
0.979389080901


## Observation
* When we use 10 features , we can see that we get a better accuracy than when we use the entire 25 features (0.9873 vs 0.9793). This might be due to the reasons:
    * When we distinguish among only two classes(in this case 2 & 3), the most of the variation might have been captured in the first 10 dimensions only. 
    * Also, the other dimensions might be corresponding to the noise in data, and in this way , we might be reducing the load on the algorithm to discover those dimensions which contribute to noise.

* We can see that using the default settings , we get a accuracy of 0.979. Using a little bit of tuning we saw that we get the best value which is better than the default settings.

### In the next section, we vary the kernel and do the same optimization

In [7]:
#Using some custom settings to optimize and using the entire 25 features
C = [0.1,0.5,1,3,10]
G = [0.001,0.01,0.02,0.03,0.04,0.05,1]
scores = []
for c in C:
    for g in G:
        clf = svm.SVC(C = c, gamma=g,kernel='poly')
        score = cross_validation.cross_val_score(clf, X_23,Y_23, cv=10, n_jobs=4).mean()
        scores.append(score)
print(scores)
print(scores.index(max(scores)))
print(scores[scores.index(max(scores))])

[0.5102934587813619, 0.5102934587813619, 0.5102934587813619, 0.95570276497695839, 0.96835157450076781, 0.97785138248847936, 0.97460077444956461, 0.5102934587813619, 0.5102934587813619, 0.95887736815156155, 0.97785138248847936, 0.98095078084997456, 0.98092517921146949, 0.97460077444956461, 0.5102934587813619, 0.54509168586789547, 0.97311427931387606, 0.98095078084997456, 0.98092517921146949, 0.97618807603686641, 0.97460077444956461, 0.5102934587813619, 0.95731486815156153, 0.98095078084997456, 0.97933787762416791, 0.97460077444956461, 0.97460077444956461, 0.97460077444956461, 0.5102934587813619, 0.97785138248847936, 0.97933787762416791, 0.97460077444956461, 0.97460077444956461, 0.97460077444956461, 0.97460077444956461]
11
0.98095078085


In [8]:
#Using some custom settings to optimize and using the first 10 features

C = [0.1,0.5,1,3,10]
G = [0.001,0.01,0.02,0.03,0.04,0.05,1]
scores = []
for c in C:
    for g in G:
        clf = svm.SVC(C = c, gamma=g,kernel='poly')
        score = cross_validation.cross_val_score(clf, X_23[:,0:10],Y_23, cv=10, n_jobs=4).mean()
        scores.append(score)
print(scores)
print(scores.index(max(scores)))
print(scores[scores.index(max(scores))])

[0.5102934587813619, 0.5102934587813619, 0.5102934587813619, 0.51669386840757814, 0.86532098054275475, 0.95560115847414229, 0.96832757296466965, 0.5102934587813619, 0.5102934587813619, 0.62282386072708662, 0.95560115847414229, 0.96038866487455188, 0.95567716333845354, 0.95235295058883762, 0.5102934587813619, 0.5102934587813619, 0.91443052355350751, 0.9619511648745519, 0.95567716333845354, 0.96510176651305668, 0.95235295058883762, 0.5102934587813619, 0.52773057475678442, 0.96195116487455201, 0.95882696492575514, 0.96668906810035826, 0.97150217613927281, 0.95235295058883762, 0.5102934587813619, 0.94453965053763445, 0.95882696492575514, 0.96822596646185344, 0.97152697772657448, 0.96832757296466965, 0.95235295058883762]
32
0.971526977727


## Observation
* We see that the best result is obtained when C = 0.5 and gamma = 0.04. This is a maxima and we decrease our accuracy by moving in any direction. The best result is 0.98095
* Also, when we use first 10 features, we get a worse performance than using 25 features (for a polynolial kernel)
#### Result: polynomial kernel gives a better performance than rbf kernel, but for first 10 dimensions, rbf kernel is better

In [9]:
#using the first 10 features
C = [0.001,0.003,0.01,0.03,0.1,0.5,1,3,10]
scores = []
for c in C:
    clf = svm.SVC(C = c,kernel='linear')
    score = cross_validation.cross_val_score(clf, X_23[:,0:10],Y_23, cv=10, n_jobs=4).mean()
    scores.append(score)
print(scores)
print(scores.index(max(scores)))
print(scores[scores.index(max(scores))])

[0.952376952124936, 0.96192636328725034, 0.96351366487455192, 0.96510176651305668, 0.96033906169994876, 0.95872615847414233, 0.9555259536610341, 0.95716365847414231, 0.95557635688684073]
3
0.965101766513


In [10]:
C = [0.001,0.003,0.01,0.03,0.1,0.5,1,3,10]
scores = []
for c in C:
    clf = svm.SVC(C = c,kernel='linear')
    score = cross_validation.cross_val_score(clf, X_23,Y_23, cv=10, n_jobs=4).mean()
    scores.append(score)
print(scores)
print(scores.index(max(scores)))
print(scores[scores.index(max(scores))])

[0.9508640552995391, 0.95721406169994872, 0.96822596646185366, 0.96668906810035826, 0.97142697132616473, 0.96029025857654882, 0.95557795698924719, 0.95714045698924721, 0.95719086021505362]
4
0.971426971326


## Observation
* We see that the best performance we can extract by using a linear kernel is 0.9714, which is worse than both polynomial kernel and rbf kernel. 
* We get better performance when we use the entire 25 features
* Also from here we get an intuition that maybe higher order polynomials may perform better. We check that in the next section


In [11]:
# Increase the degree of the polynomial kernel
#Using some custom settings to optimize further
C = [0.03,0.1,0.5,1,3,10,15]
G = [0.001,0.01,0.02,0.03,0.04,0.05,1]
scores = np.zeros([7,7])
i = 0
for i in range(0,7):
    for j in range(0,7):
        clf = svm.SVC(C = C[i], gamma=G[j],kernel='poly', degree=4)
        score = cross_validation.cross_val_score(clf, X_23,Y_23, cv=10, n_jobs=4).mean()
#         score = clf.fit(X_23,Y_23).score(X_23,Y_23)
        scores[i,j] = score
print(scores)
print(np.amax(scores))

[[ 0.51029346  0.51029346  0.51029346  0.51029346  0.86839798  0.91606903
   0.96515217]
 [ 0.51029346  0.51029346  0.51029346  0.91599382  0.93032994  0.95255376
   0.96515217]
 [ 0.51029346  0.51029346  0.90961902  0.94620376  0.96361527  0.97467678
   0.96515217]
 [ 0.51029346  0.51029346  0.91765553  0.95572837  0.97308948  0.96986367
   0.96515217]
 [ 0.51029346  0.51029346  0.95094086  0.97308948  0.97147657  0.96512737
   0.96515217]
 [ 0.51029346  0.94781506  0.96674027  0.97147657  0.96671467  0.96515217
   0.96515217]
 [ 0.51029346  0.91455613  0.97150218  0.96668907  0.96671467  0.96515217
   0.96515217]]
0.974676779314


In [12]:
Cvalues = np.repeat(C,7)
Gvalues = np.tile(G,7)

plt.contourf(np.log(Cvalues.reshape(len(C), len(C))), np.log(Gvalues.reshape(len(C), len(C))),
             scores)
plt.colorbar()
plt.title('Train Accuracy')
plt.xlabel('log(C)')
plt.ylabel('log(gamma)')
plt.show()


## Observation

We see that the polynomial kernel decreases in accuracy when we increase the degree of the kernel.

In [13]:
# Now do the same thing for two other classes
# This time take the classes to be 5 and 6

B = np.zeros(Y_train.shape[0],dtype=bool)
for i in range(0,Y_train.shape[0]):
    if(Y_train[i] == 5 or Y_train[i] == 6):
        B[i] = True

X_56 = X_train[B]
Y_56 = Y_train[B]
m,n,p = linalg.svd(X_56)
print(n)



# X_23.iloc[:,0:10]

[ 46.72695527  41.83510941  38.97715922  37.75855601  33.62344887
  32.10558063  30.58285761  28.71829002  26.46569952  25.93141986
  22.93955859  22.46139355  21.57496245  20.59900655  19.83863977
  17.50594701  15.76717351  14.67584491  13.67052227  12.91923096
  12.53860723   9.85948918   9.16887595   8.15218324   6.19211096]


In [25]:
# Using the entire 25 features

clf = svm.SVC()
score = cross_validation.cross_val_score(clf, X_56,Y_56, cv=10, n_jobs=4).mean()
print(score)
C = [0.5,1,1.5,2,10,20,100]
G = [0.01,0.1,0.3,0.5,0.7,0.9,2]
scores = np.zeros([7,7])
for i in range(0,7):
    for j in range(0,7):
        clf = svm.SVC(C = C[i], gamma=G[j])
        score = cross_validation.cross_val_score(clf, X_56,Y_56, cv=10, n_jobs=4).mean()
#         score = clf.fit(X_56,Y_56).score(X_56,Y_56)
        scores[i,j] = score
print(scores)
print(np.amax(scores))  
Cvalues = np.repeat(C,7)
Gvalues = np.tile(G,7)

# plt.contourf(np.log(Cvalues.reshape(len(C), len(C))), np.log(Gvalues.reshape(len(C), len(C))),
#              scores)
# plt.colorbar()
# plt.title('Train Accuracy')
# plt.xlabel('log(C)')
# plt.ylabel('log(gamma)')
# plt.show()

0.988218390805
[[ 0.97482759  0.97994253  0.744549    0.65518118  0.54802455  0.53618839
   0.52940873]
 [ 0.97477011  0.98994253  0.88408533  0.79994935  0.79653224  0.64064095
   0.52432398]
 [ 0.97982759  0.98994253  0.890752    0.82006429  0.81009449  0.65745081
   0.5260189 ]
 [ 0.97816092  0.9916092   0.890752    0.82006429  0.81009449  0.65745081
   0.5260189 ]
 [ 0.98321839  0.9916092   0.890752    0.82006429  0.81009449  0.65745081
   0.5260189 ]
 [ 0.98488506  0.9916092   0.890752    0.82006429  0.81009449  0.65745081
   0.5260189 ]
 [ 0.98488506  0.9916092   0.890752    0.82006429  0.81009449  0.65745081
   0.5260189 ]]
0.991609195402


In [24]:
clf = svm.SVC()
score = cross_validation.cross_val_score(clf, X_56[:,0:10],Y_56, cv=10, n_jobs=4).mean()
print(score)
C = [0.5,1,1.5,2,10,20,100]
G = [0.01,0.1,0.3,0.5,0.7,0.9,2]
scores = np.zeros([7,7])
for i in range(0,7):
    for j in range(0,7):
        clf = svm.SVC(C = C[i], gamma=G[j])
        score = cross_validation.cross_val_score(clf, X_56[:,0:10],Y_56, cv=10, n_jobs=4).mean()
#         score = clf.fit(X_56,Y_56).score(X_56,Y_56)
        scores[i,j] = score
print(scores)
print(np.amax(scores)) 

0.981551724138
[[ 0.93804598  0.97643678  0.97821839  0.97321839  0.94790376  0.92773135
   0.77118741]
 [ 0.93801773  0.98155172  0.97821839  0.97821839  0.96974187  0.95793201
   0.88054841]
 [ 0.9496844   0.98155172  0.97988506  0.97821839  0.9680752   0.95959868
   0.88721508]
 [ 0.94801773  0.98321839  0.97988506  0.97655172  0.9680752   0.95959868
   0.88721508]
 [ 0.966466    0.97816092  0.97482759  0.97655172  0.9680752   0.95959868
   0.88721508]
 [ 0.97152348  0.97649425  0.97482759  0.97655172  0.9680752   0.95959868
   0.88721508]
 [ 0.97821839  0.971466    0.97482759  0.97655172  0.9680752   0.95959868
   0.88721508]]
0.983218390805


## Observation
* We can see that using the default settings , we get a accuracy of 0.9882. Using a little bit of tuning we saw that we get the best value of 0.9916. We see that it is in somewhere in the middle of the matrix. So it is the optimal and we have obtained the optimal value.
* We see that reducing the number of features reduces the accuracy of the model. <b>This is in contrast to the  2,3 case.</b> The reasons for this might be that the first 10 dimensions might not capture the variation present in the data set, and so we need further features.
<img src="56_1.png" style="max-width:100%; width: 50%"><img src="56_5.png" style="max-width:100%; width: 50%">
### In the next section, we vary the kernel and do the same optimization

In [26]:
C = [0.5,1,1.5,2,10,20,100]
G = [0.01,0.1,0.3,0.5,0.7,0.9,2]
scores = np.zeros([7,7])
for i in range(0,7):
    for j in range(0,7):
        clf = svm.SVC(C = C[i], gamma=G[j], kernel='poly')
        score = cross_validation.cross_val_score(clf, X_56,Y_56, cv=10, n_jobs=4).mean()
#         score = clf.fit(X_56,Y_56).score(X_56,Y_56)
        scores[i,j] = score
print(scores)
print(np.amax(scores)) 
# Cvalues = np.repeat(C,7)
# Gvalues = np.tile(G,7)

# plt.contourf(np.log(Cvalues.reshape(len(C), len(C))), np.log(Gvalues.reshape(len(C), len(C))),
#              scores)
# plt.colorbar()
# plt.title('Train Accuracy')
# plt.xlabel('log(C)')
# plt.ylabel('log(gamma)')
# plt.show()

[[ 0.53288428  0.98319014  0.98319014  0.98319014  0.98319014  0.98319014
   0.98319014]
 [ 0.68052796  0.98319014  0.98319014  0.98319014  0.98319014  0.98319014
   0.98319014]
 [ 0.872331    0.98319014  0.98319014  0.98319014  0.98319014  0.98319014
   0.98319014]
 [ 0.92936976  0.98319014  0.98319014  0.98319014  0.98319014  0.98319014
   0.98319014]
 [ 0.97319014  0.98319014  0.98319014  0.98319014  0.98319014  0.98319014
   0.98319014]
 [ 0.98319014  0.98319014  0.98319014  0.98319014  0.98319014  0.98319014
   0.98319014]
 [ 0.98488603  0.98319014  0.98319014  0.98319014  0.98319014  0.98319014
   0.98319014]]
0.98488603156


In [27]:
C = [0.5,1,1.5,2,10,20,100]
G = [0.01,0.1,0.3,0.5,0.7,0.9,2]
scores = np.zeros([7,7])
for i in range(0,7):
    for j in range(0,7):
        clf = svm.SVC(C = C[i], gamma=G[j], kernel='poly')
        score = cross_validation.cross_val_score(clf, X_56[:,0:10],Y_56, cv=10, n_jobs=4).mean()
#         score = clf.fit(X_56,Y_56).score(X_56,Y_56)
        scores[i,j] = score
print(scores)
print(np.amax(scores)) 

[[ 0.51101695  0.95985681  0.97816092  0.95977109  0.96310442  0.96310442
   0.96310442]
 [ 0.51101695  0.96824761  0.96977011  0.96310442  0.96310442  0.96310442
   0.96310442]
 [ 0.51101695  0.96485681  0.96138028  0.96310442  0.96310442  0.96310442
   0.96310442]
 [ 0.51101695  0.96494253  0.95977109  0.96310442  0.96310442  0.96310442
   0.96310442]
 [ 0.7360715   0.97149425  0.96310442  0.96310442  0.96310442  0.96310442
   0.96310442]
 [ 0.83365381  0.97310345  0.96310442  0.96310442  0.96310442  0.96310442
   0.96310442]
 [ 0.90945646  0.96143776  0.96310442  0.96310442  0.96310442  0.96310442
   0.96310442]]
0.97816091954


## Observations
* Using all of the features gives a better performance than using just the first 10 features.
* We see that in this case , the best performance obtained is 0.984 which is better than the default settings for the kernel.
<img src="56_3.png" style="max-width:100%; width: 50%">
<img src="56_7.png" style="max-width:100%; width: 50%">
### Result : For this case, we see that polynomial kernel gives a worse performance than rbf kernel

In [16]:
#using the linear kernel
C = [0.5,1,1.5,2,10,20,100]
G = [0.01,0.1,0.3,0.5,0.7,0.9,2]
scores = np.zeros([7,1])
for i in range(0,7):
    clf = svm.SVC(C = C[i], kernel='linear')
    score = cross_validation.cross_val_score(clf, X_56,Y_56, cv=10, n_jobs=4).mean()
    scores[i] = score
print(scores)
print(np.amax(scores))

[[ 0.95635106]
 [ 0.9530752 ]
 [ 0.9546844 ]
 [ 0.95635106]
 [ 0.96137931]
 [ 0.96143678]
 [ 0.95477011]]
0.961436781609


## Observation
* We see that the best performance we can extract by using a linear kernel is 0.9614, which is worse than both polynomial kernel and rbf kernel.
* Also from here we get an intuition that maybe higher order polynomials may perform better. We check that in the next section


In [28]:
#Changing the degree of the polynomial kernel and using all the features
C = [0.5,1,1.5,2,10,20,100]
G = [0.01,0.02,0.04,0.05,0.1,0.3,1]
scores = np.zeros([7,7])
for i in range(0,7):
    for j in range(0,7):
        clf = svm.SVC(C = C[i], gamma=G[j], kernel='poly', degree=4)
        score = cross_validation.cross_val_score(clf, X_56,Y_56, cv=10, n_jobs=4).mean()
#         score = clf.fit(X_56,Y_56).score(X_56,Y_56)
        scores[i,j] = score
print(scores)
print(np.amax(scores))

# plt.contourf(np.log(Cvalues.reshape(len(C), len(C))), np.log(Gvalues.reshape(len(C), len(C))),
#              scores)
# plt.colorbar()
# plt.title('Train Accuracy')
# plt.xlabel('log(C)')
# plt.ylabel('log(gamma)')
# plt.show()


[[ 0.50254237  0.81336743  0.96965615  0.98155172  0.97982759  0.97982759
   0.97982759]
 [ 0.50254237  0.9645412   0.97985681  0.9866092   0.97982759  0.97982759
   0.97982759]
 [ 0.50254237  0.97471362  0.97982759  0.98321839  0.97982759  0.97982759
   0.97982759]
 [ 0.58185272  0.97132281  0.98321839  0.98149425  0.97982759  0.97982759
   0.97982759]
 [ 0.86882525  0.97301773  0.97982759  0.97982759  0.97982759  0.97982759
   0.97982759]
 [ 0.96959868  0.98155172  0.97982759  0.97982759  0.97982759  0.97982759
   0.97982759]
 [ 0.96959868  0.98149425  0.97982759  0.97982759  0.97982759  0.97982759
   0.97982759]]
0.986609195402


In [29]:
#Changing the degree of the polynomial kernel and using the first 10 features
C = [0.5,1,1.5,2,10,20,100]
G = [0.01,0.02,0.04,0.05,0.1,0.3,1]
scores = np.zeros([7,7])
for i in range(0,7):
    for j in range(0,7):
        clf = svm.SVC(C = C[i], gamma=G[j], kernel='poly', degree=4)
        score = cross_validation.cross_val_score(clf, X_56[:,0:10],Y_56, cv=10, n_jobs=4).mean()
#         score = clf.fit(X_56,Y_56).score(X_56,Y_56)
        scores[i,j] = score
print(scores)
print(np.amax(scores))

[[ 0.5059322   0.5059322   0.6655562   0.73770894  0.92603741  0.94971459
   0.94138126]
 [ 0.5059322   0.5059322   0.73604228  0.78471849  0.94465712  0.93620884
   0.94138126]
 [ 0.5059322   0.53116209  0.7545188   0.81845315  0.95138126  0.94129456
   0.94138126]
 [ 0.5059322   0.69058738  0.77129944  0.83350964  0.95138126  0.9430187
   0.94138126]
 [ 0.5059322   0.68736606  0.88575005  0.93606565  0.95146698  0.94138126
   0.94138126]
 [ 0.50759887  0.741128    0.92770407  0.94804793  0.94638126  0.94138126
   0.94138126]
 [ 0.63538477  0.85710111  0.95143873  0.95480031  0.93959965  0.94138126
   0.94138126]]
0.954800311709


## Observation

* We see that by increasing the degree of the polynomial, we have increased the (best) accuracy of the model, so we can conclude that increasing the degree is increasing the accuracy in this case. We check for higher degree in the next section.
* Also , using all the features give better performance than first 10 features.
<img src="56_8.png" style="max-width:100%; width: 50%">
<img src="56_9.png" style="max-width:100%; width: 50%">

In [18]:
#Changing the degree of the polynomial kernel
scores = []

C = [0.5,1,1.5,2,10,20,100]
G = [0.01,0.02,0.04,0.05,0.1,0.3,1]
scores = np.zeros([7,7])
for i in range(0,7):
    for j in range(0,7):
        clf = svm.SVC(C = C[i], gamma=G[j], kernel='poly', degree=5)
        score = cross_validation.cross_val_score(clf, X_56,Y_56, cv=10, n_jobs=4).mean()
        #         score = clf.fit(X_23,Y_23).score(X_23,Y_23)
        scores[i,j] = score
print(scores)
print(np.amax(scores))

[0.97307519968829137]
[[ 0.50084746  0.58147769  0.96293201  0.97979934  0.98324761  0.98324761
   0.98324761]
 [ 0.50084746  0.77641535  0.9730752   0.98155172  0.98324761  0.98324761
   0.98324761]
 [ 0.50084746  0.8588535   0.97813267  0.98152348  0.98324761  0.98324761
   0.98324761]
 [ 0.50084746  0.88899669  0.98319014  0.98152348  0.98324761  0.98324761
   0.98324761]
 [ 0.52101987  0.95275959  0.97985681  0.98324761  0.98324761  0.98324761
   0.98324761]
 [ 0.62354568  0.96798948  0.98324761  0.98324761  0.98324761  0.98324761
   0.98324761]
 [ 0.92086402  0.98155172  0.98324761  0.98324761  0.98324761  0.98324761
   0.98324761]]
0.983247613481


## Observations
We see that for degree 5, the accuracy is less than degree 4. So we can conclude that the best accuracy is obtained for polynomial kernel of degree 4. 
### Now we repeat the same analysis for classes 1 and 8

In [19]:
B = np.zeros(Y_train.shape[0],dtype=bool)
for i in range(0,Y_train.shape[0]):
    if(Y_train[i] == 1 or Y_train[i] == 8):
        B[i] = True

X_18 = X_train[B]
Y_18 = Y_train[B]
# X_23.iloc[:,0:10]
clf = svm.SVC()
# clf.fit(X_23,Y_23)
score = cross_validation.cross_val_score(clf, X_18,Y_18, cv=10, n_jobs=4).mean()
print(score)
C = [0.5,1,1.5,2,10,20,100]
G = [0.001,0.003,0.01,0.1,0.3,0.5,0.7]
scores = np.zeros([7,7])
for i in range(0,7):
    for j in range(0,7):
        clf = svm.SVC(C = C[i], gamma=G[j])
#         score = cross_validation.cross_val_score(clf, X_18,Y_18, cv=10, n_jobs=4).mean()
        score = clf.fit(X_18,Y_18).score(X_18,Y_18)
        scores[i,j] = score
print(scores)
print(np.amax(scores))  
Cvalues = np.repeat(C,7)
Gvalues = np.tile(G,7)

plt.contourf(np.log(Cvalues.reshape(len(C), len(C))), np.log(Gvalues.reshape(len(C), len(C))),
             scores)
plt.colorbar()
plt.title('Train Accuracy')
plt.xlabel('log(C)')
plt.ylabel('log(gamma)')
plt.show()

0.987499236874
[[ 0.93730408  0.96394984  0.98589342  0.99373041  0.98119122  0.97021944
   0.96394984]
 [ 0.95297806  0.97335423  0.98902821  1.          1.          1.          1.        ]
 [ 0.95611285  0.97492163  0.98902821  1.          1.          1.          1.        ]
 [ 0.96081505  0.97648903  0.98902821  1.          1.          1.          1.        ]
 [ 0.97335423  0.98746082  0.99373041  1.          1.          1.          1.        ]
 [ 0.97805643  0.98746082  0.9968652   1.          1.          1.          1.        ]
 [ 0.98589342  0.99529781  1.          1.          1.          1.          1.        ]]
1.0


## Observations
The best accuracy obtained is 0.9874 using the rbf kernel for this case. 
<img src="18_1.png" style="max-width:100%; width: 50%">
<img src="18_2.png" style="max-width:100%; width: 50%">
 ### Now we change the kernel type.

In [20]:
#using the default settings for the polynomial kernel
scores = []

clf = svm.SVC(kernel='poly')
score = cross_validation.cross_val_score(clf, X_18,Y_18, cv=10, n_jobs=4).mean()
scores.append(score)
print(scores)

C = [0.5,1,1.5,2,10,20,100]
G = [0.009,0.03,0.05,0.01,0.1,0.3,0.5]
scores = np.zeros([7,7])
for i in range(0,7):
    for j in range(0,7):
        clf = svm.SVC(C = C[i], gamma=G[j], kernel='poly')
        score = cross_validation.cross_val_score(clf, X_18,Y_18, cv=10, n_jobs=4).mean()
#         score = clf.fit(X_18,Y_18).score(X_18,Y_18)
        scores[i,j] = score
print(scores)
print(np.amax(scores)) 
Cvalues = np.repeat(C,7)
Gvalues = np.tile(G,7)

plt.contourf(np.log(Cvalues.reshape(len(C), len(C))), np.log(Gvalues.reshape(len(C), len(C))),
             scores)
plt.colorbar()
plt.title('Train Accuracy')
plt.xlabel('log(C)')
plt.ylabel('log(gamma)')
plt.show()

[0.98898809523809528]
[[ 0.52820055  0.98276213  0.9889881   0.52820055  0.9874256   0.9889881
   0.9889881 ]
 [ 0.52820055  0.98432463  0.9889881   0.52820055  0.9889881   0.9889881
   0.9889881 ]
 [ 0.52820055  0.98588713  0.9889881   0.53916361  0.9889881   0.9889881
   0.9889881 ]
 [ 0.53757631  0.98588713  0.9889881   0.59236607  0.9889881   0.9889881
   0.9889881 ]
 [ 0.97028465  0.9889881   0.9889881   0.97653541  0.9889881   0.9889881
   0.9889881 ]
 [ 0.98432463  0.9874256   0.9889881   0.98276213  0.9889881   0.9889881
   0.9889881 ]
 [ 0.9889881   0.9889881   0.9889881   0.9889881   0.9889881   0.9889881
   0.9889881 ]]
0.988988095238


## Observation
The best accuracy obtained is 0.9889 which is better than rbf kernel.

### Result: Polynomial kernel is better than rbf kernel for this case.

Now we check for linear kernel


In [21]:
#using the linear kernel
C = [0.1,0.3,0.5,1,1.5,2,10]

scores = np.zeros([7,1])
for i in range(0,7):
    clf = svm.SVC(C = C[i], kernel='linear')
    score = cross_validation.cross_val_score(clf, X_18,Y_18, cv=10, n_jobs=4).mean()
    scores[i] = score
print(scores)
print(np.amax(scores))

[[ 0.96872215]
 [ 0.97028541]
 [ 0.97499771]
 [ 0.97026061]
 [ 0.97026061]
 [ 0.97028465]
 [ 0.96869811]]
0.974997710623


## Observation
Linear kernel performs worse than both rbf and polynomial kernel.
Now we vary the degree of the polynomial kernel.

In [22]:
#using the default settings for the polynomial kernel
scores = []

clf = svm.SVC(kernel='poly', degree = 4)
score = cross_validation.cross_val_score(clf, X_18,Y_18, cv=10, n_jobs=4).mean()
scores.append(score)
print(scores)

C = [0.5,1,1.5,2,10,20,100]
G = [0.001,0.005,0.009,0.03,0.05,0.01,0.1]
scores = np.zeros([7,7])
for i in range(0,7):
    for j in range(0,7):
        clf = svm.SVC(C = C[i], gamma=G[j], kernel='poly', degree=4)
        score = cross_validation.cross_val_score(clf, X_18,Y_18, cv=10, n_jobs=4).mean()
#         score = clf.fit(X_18,Y_18).score(X_18,Y_18)
        scores[i,j] = score
print(scores)
print(np.amax(scores)) 
# Cvalues = np.repeat(C,7)
# Gvalues = np.tile(G,7)

# plt.contourf(np.log(Cvalues.reshape(len(C), len(C))), np.log(Gvalues.reshape(len(C), len(C))),
#              scores)
# plt.colorbar()
# plt.title('Test Accuracy')
# plt.xlabel('log(C)')
# plt.ylabel('log(gamma)')
# plt.show()

[0.99213789682539688]
[[ 0.52820055  0.52820055  0.52820055  0.85264461  0.9921379   0.52820055
   0.98434867]
 [ 0.52820055  0.52820055  0.52820055  0.97340888  0.9921379   0.52820055
   0.98122367]
 [ 0.52820055  0.52820055  0.52820055  0.98437271  0.98901213  0.52820055
   0.98122367]
 [ 0.52820055  0.52820055  0.52820055  0.99059944  0.98901213  0.52820055
   0.98122367]
 [ 0.52820055  0.52820055  0.52820055  0.9905506   0.98434867  0.55327648
   0.98122367]
 [ 0.52820055  0.52820055  0.59082837  0.98744963  0.98122367  0.67394231
   0.98122367]
 [ 0.52820055  0.52820055  0.95934562  0.98122367  0.98122367  0.97965965
   0.98122367]]
0.992137896825


## Observation
We see that with degree 4 , polynomial kernel performs better than degree 3. We now check for degree 5.

In [23]:
#using the default settings for the polynomial kernel
scores = []

clf = svm.SVC(kernel='poly', degree = 5)
score = cross_validation.cross_val_score(clf, X_18,Y_18, cv=10, n_jobs=4).mean()
scores.append(score)
print(scores)

C = [0.5,1,1.5,2,10,20,100]
G = [0.001,0.005,0.009,0.03,0.05,0.01,0.1]
scores = np.zeros([7,7])
for i in range(0,7):
    for j in range(0,7):
        clf = svm.SVC(C = C[i], gamma=G[j], kernel='poly', degree=5)
        score = cross_validation.cross_val_score(clf, X_18,Y_18, cv=10, n_jobs=4).mean()
#         score = clf.fit(X_18,Y_18).score(X_18,Y_18)
        scores[i,j] = score
print(scores)
print(np.amax(scores)) 
# Cvalues = np.repeat(C,7)
# Gvalues = np.tile(G,7)

# plt.contourf(np.log(Cvalues.reshape(len(C), len(C))), np.log(Gvalues.reshape(len(C), len(C))),
#              scores)
# plt.colorbar()
# plt.title('Test Accuracy')
# plt.xlabel('log(C)')
# plt.ylabel('log(gamma)')
# plt.show()

[0.94047161172161187]
[[ 0.52820055  0.52820055  0.52820055  0.63011561  0.96088408  0.52820055
   0.98429983]
 [ 0.52820055  0.52820055  0.52820055  0.71938034  0.96557158  0.52820055
   0.98273733]
 [ 0.52820055  0.52820055  0.52820055  0.80249199  0.97025984  0.52820055
   0.98273733]
 [ 0.52820055  0.52820055  0.52820055  0.83696848  0.97338561  0.52820055
   0.98273733]
 [ 0.52820055  0.52820055  0.52820055  0.96400908  0.98117483  0.52820055
   0.98273733]
 [ 0.52820055  0.52820055  0.52820055  0.97333677  0.98429983  0.52820055
   0.98273733]
 [ 0.52820055  0.52820055  0.56421474  0.98117483  0.98273733  0.60811508
   0.98273733]]
0.984299832112


## Observation
We see that for degree 5 , performance decreases. So we can say that optimal performance is obtained at degree 4.