In [None]:
import matplotlib.pyplot as plt
import numpy as np 
from sklearn import svm
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from PIL import Image

print("Imported libraries.")

In [None]:
mnist_train = np.loadtxt('mnist_train_8000.csv', delimiter=',', skiprows=0)  #Longer compile time, more accurate
#mnist_train = np.loadtxt('mnist_train_1200.csv', delimiter=',', skiprows=0) #Shorter compile time, less accurate
mnist_test = np.loadtxt('mnist_test_1000.csv', delimiter=',', skiprows=0)

train_data = mnist_train[:,1:]
train_digit = mnist_train[:, 0]

test_data = mnist_test[:,1:]
test_digit = mnist_test[:, 0]

print("Imported training/testing MNIST datasets.")
print("The training dataset has " + str(len(train_digit)) + " entries.")
print("The testing dataset has " + str(len(test_digit)) + " entries.")

In [None]:
steps = [('scaler', StandardScaler()), ('SVM', SVC(kernel='poly'))]
pipeline = Pipeline(steps)

print("Defined transformer/estimator objects.")

In [None]:
parameters = {'SVM__C':[0.001], 'SVM__gamma':[10]}
grid = GridSearchCV(pipeline, param_grid=parameters)

grid.fit(train_data, train_digit)
test_pred_digit = grid.predict(test_data)

print('Trained the model! Accuracy score = ' + str(100 * grid.score(test_data, test_digit)) + '%.')

In [None]:
n = np.random.randint(len(test_data))
img = (np.reshape(test_data[n], (28, 28)) * 255).astype(np.uint8)

print('Testing the model...')
if test_pred_digit[n] == test_digit[n]:
    print('Prediction correct!')
else:
    print('Prediction failed!')

plt.title('Predicted: ' + str(int(test_pred_digit[n])) + ', Actual: ' + str(int(test_digit[n])))
plt.imshow(img, interpolation='nearest', cmap='gray')
plt.show()

In [None]:
self_test = []
img_arr = []

for n in range(1, 10, 1):
    file_name = str(n) + 'test.png'
    img = Image.open(file_name).convert('L')
    img_data = np.array(img)
    img_arr.append(img_data)
    data = img_data.flatten()
    self_test.append(data)

print("Imported handmade test data.")

In [None]:
test_pred = grid.predict(self_test)
fig, axs = plt.subplots(3, 3, figsize=(10, 10))

for n in range(len(test_pred)):
    axs[n//3, n%3].imshow(img_arr[n], interpolation='nearest', cmap='gray')
    axs[n//3, n%3].set_title('Predicted digit: ' + str(int(test_pred[n])))