In [1]:
!pip install idx2numpy

import numpy as np
import pandas as pd
import idx2numpy
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

Collecting idx2numpy
  Downloading idx2numpy-1.2.3.tar.gz (6.8 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: idx2numpy
  Building wheel for idx2numpy (setup.py) ... [?25l[?25hdone
  Created wheel for idx2numpy: filename=idx2numpy-1.2.3-py3-none-any.whl size=7904 sha256=406afa5ea54f84ff995cc6a643fcb08ca56b04ceaca06ba1176213648e6ed36e
  Stored in directory: /root/.cache/pip/wheels/e0/f4/e7/643fc5f932ec2ff92997f43f007660feb23f948aa8486f1107
Successfully built idx2numpy
Installing collected packages: idx2numpy
Successfully installed idx2numpy-1.2.3


**Reading Dataset**

In [27]:
X_train = idx2numpy.convert_from_file('/content/train-images.idx3-ubyte')
y_train = idx2numpy.convert_from_file('/content/train-labels.idx1-ubyte')

X_test = idx2numpy.convert_from_file('/content/t10k-images.idx3-ubyte')
y_test = idx2numpy.convert_from_file('/content/t10k-labels.idx1-ubyte')

In [28]:
print("X_train Shape : " + str(X_train.shape))
print("Y_train Shape : " + str(y_train.shape))

print("X_test Shape : " + str(X_test.shape))
print("Y_test Shape : " + str(y_test.shape))

X_train Shape : (60000, 28, 28)
Y_train Shape : (60000,)
X_test Shape : (10000, 28, 28)
Y_test Shape : (10000,)


**Reshaping Data**

In [29]:
# reshaping data
X_train = X_train.reshape(60000,-1)
X_test = X_test.reshape(10000,-1)

# it was taking too  long to train on full set
X_train = X_train[:30000, :]
y_train = y_train[:30000]

print("X_train Shape : " + str(X_train.shape))
print("Y_train Shape : " + str(y_train.shape))

print("X_test Shape : " + str(X_test.shape))
print("Y_test Shape : " + str(y_test.shape))

X_train Shape : (30000, 784)
Y_train Shape : (30000,)
X_test Shape : (10000, 784)
Y_test Shape : (10000,)


**Linear SVM**

In [30]:
linear_svm = SVC(kernel='linear', decision_function_shape='ovr')
linear_svm.fit(X_train, y_train)

print("Linear SVM's Accuracy on Training Set : ", linear_svm.score(X_train, y_train) * 100)

Linear SVM's Accuracy on Training Set :  100.0


In [31]:
# Predicting
y_pred = linear_svm.predict(X_test)

# Accuracy
accuracy = accuracy_score(y_test,y_pred)
print("Linear SVM's Accuracy on Test Set : ", accuracy * 100)


Linear SVM's Accuracy on Test Set :  91.45


**Standardizing Data**

In [32]:
# Standardization
scaler = StandardScaler()

scaler.fit(X_train)
standardized_X_train = scaler.transform(X_train)

scaler.fit(X_test)
standardized_X_test = scaler.transform(X_test)

**Linear SVM with Standardized Data**

In [33]:
linear_svm_with_standard_data = SVC(kernel='linear', decision_function_shape='ovr')
linear_svm_with_standard_data.fit(standardized_X_train, y_train)

print("Linear SVM with Standardized Data Accuracy on Training Set : ", linear_svm_with_standard_data.score(standardized_X_train, y_train) * 100)

Linear SVM with Standardized Data Accuracy on Training Set :  99.60666666666667


In [34]:
# Predicting
y_pred = linear_svm_with_standard_data.predict(standardized_X_test)

# Accuracy
accuracy = accuracy_score(y_test,y_pred)
print("Linear SVM with Standardized Data Accuracy on Test Set : ", accuracy * 100)

Linear SVM with Standardized Data Accuracy on Test Set :  92.0


**Non-Linear SVM with Standardized Data**

In [35]:
non_linear_svm = SVC(kernel='rbf', decision_function_shape='ovr')
non_linear_svm.fit(standardized_X_train, y_train)

print("Non-Linear SVM with Standardized Data Accuracy on Training Set : ", non_linear_svm.score(standardized_X_train, y_train) * 100)

Non-Linear SVM with Standardized Data Accuracy on Training Set :  98.55000000000001


In [36]:
# Predicting
y_pred = non_linear_svm.predict(standardized_X_test)

# Accuracy
accuracy = accuracy_score(y_test,y_pred)
print("Non-Linear SVM with Standardized Data Accuracy on Test Set : ", accuracy * 100)

Non-Linear SVM with Standardized Data Accuracy on Test Set :  95.78999999999999
