## Import all the necessary packages

In [1]:
import pandas as pd
from sklearn import linear_model
from sklearn.model_selection import train_test_split
import gc

### Read the train.csv file

In [2]:
gc.collect()

20

In [3]:
df=pd.read_csv("train.csv")
df.head(3)

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Check for the distribution of the classes in the dataset
#### 0-9 occur almost same number of times only
#### The distribution of the data set is even across all labels

In [4]:
counts=df.groupby(['label']).size().reset_index(name='count')
percent=counts['count']/len(df)*100
percent

0     9.838095
1    11.152381
2     9.945238
3    10.359524
4     9.695238
5     9.035714
6     9.850000
7    10.478571
8     9.673810
9     9.971429
Name: count, dtype: float64

## Prepare the data for predictions


In [5]:
# Divide the data into X and Y
# Y means the output variable -label
X=df.iloc[:,1:]
Y=df['label']

# Rescaling the features
from sklearn.preprocessing import scale
X = scale(X)

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.1,random_state=101)

print (x_train.shape)
print (x_test.shape)

  


(37800, 784)
(4200, 784)


### Fit linear model into SVM kernel

In [6]:
from sklearn import svm
from sklearn import metrics

# an initial SVM model with linear kernel   
svm_linear = svm.SVC(kernel='linear')

# fit
svm_linear.fit(x_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [7]:
## Predict the labels
predictions = svm_linear.predict(x_test)

## Get the confusion matrix
confusion = metrics.confusion_matrix(y_true = y_test, y_pred = predictions)
class_wise = metrics.classification_report(y_true=y_test, y_pred=predictions)
print(class_wise)

              precision    recall  f1-score   support

           0       0.95      0.98      0.96       409
           1       0.94      0.99      0.97       448
           2       0.91      0.90      0.91       416
           3       0.89      0.89      0.89       416
           4       0.88      0.92      0.90       411
           5       0.88      0.88      0.88       365
           6       0.96      0.97      0.96       427
           7       0.94      0.92      0.93       437
           8       0.92      0.85      0.89       416
           9       0.89      0.87      0.88       455

   micro avg       0.92      0.92      0.92      4200
   macro avg       0.92      0.92      0.92      4200
weighted avg       0.92      0.92      0.92      4200



In [None]:
gc.collect()
# an initial SVM model with linear kernel   
svm_linear = svm.SVC(kernel='rbf',gamma=0.1,C=100)

# fit
svm_linear.fit(x_train, y_train)

## Predict the labels
predictions = svm_linear.predict(x_test)

## Get the confusion matrix

confusion = metrics.confusion_matrix(y_true = y_test, y_pred = predictions)
class_wise = metrics.classification_report(y_true=y_test, y_pred=predictions)
print(class_wise)
print (metrics.accuracy_score(y_test,predictions))

0.9697619047619047


In [56]:
test=pd.read_csv("test.csv")

In [57]:
test_data=pd.read_csv("test.csv")
test_data=scale(test_data)
test_pred=pd.Series(svm_linear.predict(test_data))

submissions=pd.DataFrame({'ImageId':test.index.values+1,'Label':test_pred})
submissions.to_csv("MNIST_2.csv",index=False)

  


In [38]:
## Implement GridSearchCV
from sklearn.model_selection import GridSearchCV

parameters={'C':[1,10,100,1000],'gamma':[1e-3,1e-5,1e-7]}

# an initial SVM model with linear kernel   
svm_rbf = svm.SVC(kernel='rbf')

clf=GridSearchCV(svm_rbf,param_grid=parameters, scoring='accuracy')
clf.fit(x_train,y_train)
confusion = metrics.confusion_matrix(y_true = y_test, y_pred = predictions)
class_wise = metrics.classification_report(y_true=y_test, y_pred=predictions)
print(class_wise)
print (metrics.accuracy_score(y_test,predictions))




KeyboardInterrupt: 