## Imports

In [1]:
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
import numpy as np
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from scipy.stats import randint
from sklearn.model_selection import GridSearchCV
import pandas as pd

## Download Resources

In [2]:
# MNIST
mnist_train_transform = transforms.Compose ([transforms.ToTensor()])
mnist_test_transform = transforms.Compose([transforms.ToTensor()])

trainset_mnist = datasets.MNIST(root = './data', train = True, download = True, transform = mnist_train_transform)
testset_mnist = datasets.MNIST(root = './data', train = False, download = True, transform = mnist_test_transform)

MNIST_train = DataLoader(trainset_mnist, batch_size=32, shuffle=True, num_workers = 2)
MNIST_test = DataLoader(testset_mnist, batch_size=32, shuffle=False, num_workers=2)

MNIST_train_images = []
MNIST_train_labels = []
for batch in MNIST_train:
  images, labels = batch
  images_flat = images.view(images.shape[0], -1)
  MNIST_train_images.append(images_flat.numpy())
  MNIST_train_labels.append(labels.numpy())
MNIST_train_images = np.vstack(MNIST_train_images)
MNIST_train_labels = np.concatenate(MNIST_train_labels)

MNIST_test_images = []
MNIST_test_labels = []
for batch in MNIST_test:
  images, labels = batch
  images_flat = images.view(images.shape[0],-1)
  MNIST_test_images.append(images_flat.numpy())
  MNIST_test_labels.append(labels.numpy())
MNIST_test_images = np.vstack(MNIST_test_images)
MNIST_test_labels = np.concatenate(MNIST_test_labels)

## Decision Tree

### Training

In [3]:
grid_searchs=[]
result=[]
for depth in [3,6,9,12]:
    params_grid = {
        'max_depth' : [depth],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4], 
        'max_leaf_nodes': [5, 10, None]
    }
    model = DecisionTreeClassifier()
    grid_search = GridSearchCV(model, param_grid=params_grid, cv=5)
    grid_search.fit(MNIST_train_images, MNIST_train_labels)
    grid_searchs.append(grid_search)
    print('depth %d best score : %f' % (depth, grid_search.best_score_))

depth 3 best score : 0.490250
depth 6 best score : 0.737383
depth 9 best score : 0.839467
depth 12 best score : 0.870583


### Score

In [4]:
scores = []
for grid_search in grid_searchs:
    accuracy_training = grid_search.score(MNIST_train_images, MNIST_train_labels)
    accuracy_test = grid_search.score(MNIST_test_images, MNIST_test_labels)
    scores.append([grid_search.best_params_['max_depth'], accuracy_training, accuracy_test])

pd.DataFrame(scores, columns=["Depth", "Accuracy(Training Set)", "Accuracy(Test Set)"])

Unnamed: 0,Depth,Accuracy(Training Set),Accuracy(Test Set)
0,3,0.491517,0.4953
1,6,0.73825,0.7415
2,9,0.866517,0.8499
3,12,0.949167,0.8769


## SVM(linear)

### Training

In [5]:
clf_linear = svm.SVC(kernel='linear')
clf_linear.fit(MNIST_train_images, MNIST_train_labels)

### Score

In [6]:
accuracy_training = clf_linear.score(MNIST_train_images, MNIST_train_labels)
accuracy_test = clf_linear.score(MNIST_test_images, MNIST_test_labels)
result = [[accuracy_training, accuracy_test]]
    
pd.DataFrame(result, columns=["Accuracy(Training Set)", "Accuracy(Test Set)"])

Unnamed: 0,Accuracy(Training Set),Accuracy(Test Set)
0,0.970733,0.9404


## SVM(kernel)

### Training

In [7]:
clf_kernel = svm.SVC(kernel='rbf')
clf_kernel.fit(MNIST_train_images, MNIST_train_labels)

### Score

In [8]:
accuracy_training = clf_kernel.score(MNIST_train_images, MNIST_train_labels)
accuracy_test = clf_kernel.score(MNIST_test_images, MNIST_test_labels)
result = [[accuracy_training, accuracy_test]]
    
pd.DataFrame(result, columns=["Accuracy(Training Set)", "Accuracy(Test Set)"])

Unnamed: 0,Accuracy(Training Set),Accuracy(Test Set)
0,0.989917,0.9792
