## Import

In [1]:
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
import numpy as np
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from scipy.stats import randint
from sklearn.model_selection import GridSearchCV

## Download Resources

In [2]:
# MNIST
mnist_train_transform = transforms.Compose ([transforms.ToTensor()])
mnist_test_transform = transforms.Compose([transforms.ToTensor()])

trainset_mnist = datasets.MNIST(root = './data', train = True, download = True, transform = mnist_train_transform)
testset_mnist = datasets.MNIST(root = './data', train = False, download = True, transform = mnist_test_transform)

MNIST_train = DataLoader(trainset_mnist, batch_size=32, shuffle=True, num_workers = 2)
MNIST_test = DataLoader(testset_mnist, batch_size=32, shuffle=False, num_workers=2)

MNIST_train_images = []
MNIST_train_labels = []
for batch in MNIST_train:
  images, labels = batch
  images_flat = images.view(images.shape[0], -1)
  MNIST_train_images.append(images_flat.numpy())
  MNIST_train_labels.append(labels.numpy())
MNIST_train_images = np.vstack(MNIST_train_images)
MNIST_train_labels = np.concatenate(MNIST_train_labels)

MNIST_test_images = []
MNIST_test_labels = []
for batch in MNIST_test:
  images, labels = batch
  images_flat = images.view(images.shape[0],-1)
  MNIST_test_images.append(images_flat.numpy())
  MNIST_test_labels.append(labels.numpy())
MNIST_test_images = np.vstack(MNIST_test_images)
MNIST_test_labels = np.concatenate(MNIST_test_labels)

## Decision Tree Test

In [None]:
model = DecisionTreeClassifier(max_depth=12)
model.fit(MNIST_train_images, MNIST_train_labels)
score = model.score(MNIST_test_images, MNIST_test_labels)
print("Model %d score : %f\nParameters : %s" % (model.max_depth, score, model.get_params()))

## Decision Tree Assignment

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_validate
import pandas as pd
params_grid = {
  'min_samples_split': [2, 5, 10],
  'min_samples_leaf': [1, 2, 4], 
  'max_leaf_nodes': [5, 10, None]
}

grid_searchs=[]
result=[]
for depth in [3,6,9,12]:
    model = DecisionTreeClassifier(max_depth=depth)
    grid_search = GridSearchCV(model, param_grid=params_grid, cv=5)
    grid_search.fit(MNIST_train_images, MNIST_train_labels)
    
    accuracy_training = grid_search.score(MNIST_train_images, MNIST_train_labels)
    accuracy_test = grid_search.score(MNIST_test_images, MNIST_test_labels)
    result.append([depth, accuracy_training, accuracy_test])
    grid_searchs.append(grid_search)
    print('depth %f best score : %f' % grid_search.best_score_)

pd.DataFrame(result, columns=["Depth", "Accuracy\n(Training Set)", "Accuracy\n(Test Set)"])

## SVM Test

## SVM Assignment