In [2]:
import random
import numpy as np
from data_process import get_FASHION_data, get_MUSHROOM_data
from scipy.spatial import distance
from models import Perceptron, SVM, Softmax, Logistic
from kaggle_submission import output_submission_csv
%matplotlib inline

# For auto-reloading external modules
# See http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

# Loading Fashion-MNIST

In the following cells we determine the number of images for each split and load the images.
<br /> 
TRAIN_IMAGES + VAL_IMAGES = (0, 60000]
, TEST_IMAGES = 10000

In [5]:
# You can change these numbers for experimentation
# For submission we will use the default values 
TRAIN_IMAGES = 50000
VAL_IMAGES = 10000
normalize = True

In [6]:
data = get_FASHION_data(TRAIN_IMAGES, VAL_IMAGES, normalize=normalize)
X_train_fashion, y_train_fashion = data['X_train'], data['y_train']
X_val_fashion, y_val_fashion = data['X_val'], data['y_val']
X_test_fashion, y_test_fashion = data['X_test'], data['y_test']
n_class_fashion = len(np.unique(y_test_fashion))

# Loading Mushroom

In the following cells we determine the splitting of the mushroom dataset.
<br /> TRAINING + VALIDATION = 0.8, TESTING = 0.2

In [7]:
# TRAINING = 0.6 indicates 60% of the data is used as the training dataset.
VALIDATION = 0.2

In [8]:
data = get_MUSHROOM_data(VALIDATION)
X_train_MR, y_train_MR = data['X_train'], data['y_train']
X_val_MR, y_val_MR = data['X_val'], data['y_val']
X_test_MR, y_test_MR = data['X_test'], data['y_test']
n_class_MR = len(np.unique(y_test_MR))

print("Number of train samples: ", X_train_MR.shape[0])
print("Number of val samples: ", X_val_MR.shape[0])
print("Number of test samples: ", X_test_MR.shape[0])

Number of train samples:  4874
Number of val samples:  1625
Number of test samples:  1625


### Get Accuracy

This function computes how well your model performs using accuracy as a metric.

In [9]:
def get_acc(pred, y_test):
    return np.sum(y_test == pred) / len(y_test) * 100

# Perceptron

Perceptron has 2 hyperparameters that you can experiment with:
- **Learning rate** - controls how much we change the current weights of the classifier during each update. We set it at a default value of 0.5, but you should experiment with different values. We recommend changing the learning rate by factors of 10 and observing how the performance of the classifier changes. You should also try adding a **decay** which slowly reduces the learning rate over each epoch.
- **Number of Epochs** - An epoch is a complete iterative pass over all of the data in the dataset. During an epoch we predict a label using the classifier and then update the weights of the classifier according to the perceptron update rule for each sample in the training set. You should try different values for the number of training epochs and report your results.

You will implement the Perceptron classifier in the **models/perceptron.py**

The following code: 
- Creates an instance of the Perceptron classifier class 
- The train function of the Perceptron class is trained on the training data
- We use the predict function to find the training accuracy as well as the testing accuracy


## Train Perceptron on Fashion-MNIST

In [None]:
lr = 0.5
n_epochs = 10

percept_fashion = Perceptron(n_class_fashion, lr, n_epochs)
percept_fashion.train(X_train_fashion, y_train_fashion)

In [None]:
pred_percept = percept_fashion.predict(X_train_fashion)
print('The training accuracy is given by: %f' % (get_acc(pred_percept, y_train_fashion)))

### Validate Perceptron on Fashion-MNIST

In [None]:
pred_percept = percept_fashion.predict(X_val_fashion)
print('The validation accuracy is given by: %f' % (get_acc(pred_percept, y_val_fashion)))

### Test Perceptron on Fashion-MNIST

In [None]:
pred_percept = percept_fashion.predict(X_test_fashion)
print('The testing accuracy is given by: %f' % (get_acc(pred_percept, y_test_fashion)))

### Perceptron_Fashion-MNIST Kaggle Submission

Once you are satisfied with your solution and test accuracy, output a file to submit your test set predictions to the Kaggle for Assignment 1 Fashion-MNIST. Use the following code to do so:

In [None]:
output_submission_csv('kaggle/perceptron_submission_fashion.csv', percept_fashion.predict(X_test_fashion))

## Train Perceptron on Mushroom

In [None]:
lr = 0.5
n_epochs = 10

percept_MR = Perceptron(n_class_MR, lr, n_epochs)
percept_MR.train(X_train_MR, y_train_MR)

In [None]:
pred_percept = percept_MR.predict(X_train_MR)
print('The training accuracy is given by: %f' % (get_acc(pred_percept, y_train_MR)))

### Validate Perceptron on Mushroom

In [None]:
pred_percept = percept_MR.predict(X_val_MR)
print('The validation accuracy is given by: %f' % (get_acc(pred_percept, y_val_MR)))

### Test Perceptron on Mushroom

In [None]:
pred_percept = percept_MR.predict(X_test_MR)
print('The testing accuracy is given by: %f' % (get_acc(pred_percept, y_test_MR)))

# Support Vector Machines (with SGD)

Next, you will implement a "soft margin" SVM. In this formulation you will maximize the margin between positive and negative training examples and penalize margin violations using a hinge loss.

We will optimize the SVM loss using SGD. This means you must compute the loss function with respect to model weights. You will use this gradient to update the model weights.

SVM optimized with SGD has 3 hyperparameters that you can experiment with:
- **Learning rate** - similar to as defined above in Perceptron, this parameter scales by how much the weights are changed according to the calculated gradient update. 
- **Epochs** - similar to as defined above in Perceptron.
- **Regularization constant** - Hyperparameter to determine the strength of regularization. In this case it is a coefficient on the term which maximizes the margin. You could try different values. The default value is set to 0.05.

You will implement the SVM using SGD in the **models/svm.py**

The following code: 
- Creates an instance of the SVM classifier class 
- The train function of the SVM class is trained on the training data
- We use the predict function to find the training accuracy as well as the testing accuracy

## Train SVM on Fashion-MNIST

In [None]:
lr = 0.5
n_epochs = 10
reg_const = 0.05

svm_fashion = SVM(n_class_fashion, lr, n_epochs, reg_const)
svm_fashion.train(X_train_fashion, y_train_fashion)

In [None]:
pred_svm = svm_fashion.predict(X_train_fashion)
print('The training accuracy is given by: %f' % (get_acc(pred_svm, y_train_fashion)))

### Validate SVM on Fashion-MNIST

In [None]:
pred_svm = svm_fashion.predict(X_val_fashion)
print('The validation accuracy is given by: %f' % (get_acc(pred_svm, y_val_fashion)))

### Test SVM on Fashion-MNIST

In [None]:
pred_svm = svm_fashion.predict(X_test_fashion)
print('The testing accuracy is given by: %f' % (get_acc(pred_svm, y_test_fashion)))

### SVM_Fashion-MNIST Kaggle Submission

Once you are satisfied with your solution and test accuracy output a file to submit your test set predictions to the Kaggle for Assignment 1 Fashion-MNIST. Use the following code to do so:

In [None]:
output_submission_csv('kaggle/svm_submission_fashion.csv', svm_fashion.predict(X_test_fashion))

## Train SVM on Mushroom

In [None]:
lr = 0.5
n_epochs = 10
reg_const = 0.05

svm_MR = SVM(n_class_MR, lr, n_epochs, reg_const)
svm_MR.train(X_train_MR, y_train_MR)

In [None]:
pred_svm = svm_MR.predict(X_train_MR)
print('The training accuracy is given by: %f' % (get_acc(pred_svm, y_train_MR)))

### Validate SVM on Mushroom

In [None]:
pred_svm = svm_MR.predict(X_val_MR)
print('The validation accuracy is given by: %f' % (get_acc(pred_svm, y_val_MR)))

## Test SVM on Mushroom

In [None]:
pred_svm = svm_MR.predict(X_test_MR)
print('The testing accuracy is given by: %f' % (get_acc(pred_svm, y_test_MR)))

# Softmax Classifier (with SGD)

Next, you will train a Softmax classifier. This classifier consists of a linear function of the input data followed by a softmax function which outputs a vector of dimension C (number of classes) for each data point. Each entry of the softmax output vector corresponds to a confidence in one of the C classes, and like a probability distribution, the entries of the output vector sum to 1. We use a cross-entropy loss on this sotmax output to train the model. 

Check the following link as an additional resource on softmax classification: http://cs231n.github.io/linear-classify/#softmax

Once again we will train the classifier with SGD. This means you need to compute the gradients of the softmax cross-entropy loss function according to the weights and update the weights using this gradient. Check the following link to help with implementing the gradient updates: https://deepnotes.io/softmax-crossentropy

The softmax classifier has 3 hyperparameters that you can experiment with:
- **Learning rate** - As above, this controls how much the model weights are updated with respect to their gradient.
- **Number of Epochs** - As described for perceptron.
- **Regularization constant** - Hyperparameter to determine the strength of regularization. In this case, we minimize the L2 norm of the model weights as regularization, so the regularization constant is a coefficient on the L2 norm in the combined cross-entropy and regularization objective.

You will implement a softmax classifier using SGD in the **models/softmax.py**

The following code: 
- Creates an instance of the Softmax classifier class 
- The train function of the Softmax class is trained on the training data
- We use the predict function to find the training accuracy as well as the testing accuracy

## Train Softmax on Fashion-MNIST

In [29]:
lr = 0.001
n_epochs = 1
reg_const = 0
from models.neural_net import NeuralNetwork

num_layers = 3
input_size = X_train_fashion.shape[1]
hidden_size = 10
num_classes = n_class_fashion
num_inputs = X_train_fashion.shape[0]
np.random.seed(0)
hidden_sizes = [hidden_size] * (num_layers - 1)
regularization = 0

net = NeuralNetwork(input_size, hidden_sizes, num_classes, num_layers)
epochs = 10
train_loss = np.zeros(epochs)
train_accuracy = np.zeros(epochs)
learning_rate = 0.01
# For each epoch...
for epoch in range(epochs):        
    # Run the forward pass of the model to get a prediction and compute the accuracy
    scores = net.forward(X_train_fashion)
    pred = np.argmax(scores, axis=1)
    train_accuracy[epoch] += (pred == y_train_fashion).sum()
    
    # Run the backward pass of the model to compute the loss, and update the weights
    train_loss[epoch] += net.backward(y_train_fashion, regularization)
    print(f"Epoch{epoch+1}\t, acc:{train_accuracy[epoch]/num_inputs:.4f}, loss:{train_loss[epoch]:.4f}")
    net.update(learning_rate)
    
    
# softmax_fashion = Softmax(n_class_fashion, lr, n_epochs, reg_const)
# softmax_fashion.train(X_train_fashion, y_train_fashion)

--------Output------------
X0 
 [[-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 ...
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]]
A0 
 [[-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 ...
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]]
X1 
 [[ 163.03   52.89   -3.6  ...  -80.22  -27.27  230.57]
 [  -8.36  -22.83  102.94 ...  -36.2   -79.69   96.5 ]
 [ -35.28 -135.44   11.05 ...   

  eps: float = 1e-8,


Epoch3	, acc:0.0996, loss:inf
***********params***********
--------Output------------
X0 
 [[-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 ...
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]]
A0 
 [[-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 ...
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]]
X1 
 [[-3.76e+137 -1.23e+161  1.75e+164 ...  3.37e+164  3.93e+138  3.51e+138]
 [-1.13e+1

Epoch6	, acc:0.0996, loss:inf
***********params***********
--------Output------------
X0 
 [[-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 ...
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]]
A0 
 [[-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 ...
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]]
X1 
 [[-3.76e+137 -1.23e+161  1.75e+164 ...  3.37e+164  3.93e+138  3.51e+138]
 [-1.13e+1

Epoch9	, acc:0.0996, loss:inf
***********params***********
--------Output------------
X0 
 [[-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 ...
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]]
A0 
 [[-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 ...
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]
 [-9.00e-04 -6.16e-03 -3.09e-02 ... -2.68e+00 -8.04e-01 -7.32e-02]]
X1 
 [[-3.76e+137 -1.23e+161  1.75e+164 ...  3.37e+164  3.93e+138  3.51e+138]
 [-1.13e+1

In [286]:
pred_softmax = softmax_fashion.predict(X_train_fashion)
print('The training accuracy is given by: %f' % (get_acc(pred_softmax, y_train_fashion)))

The training accuracy is given by: 81.448000


### Validate Softmax on Fashion-MNIST

In [287]:
pred_softmax = softmax_fashion.predict(X_val_fashion)
print('The validation accuracy is given by: %f' % (get_acc(pred_softmax, y_val_fashion)))

The validation accuracy is given by: 78.760000


### Testing Softmax on Fashion-MNIST

In [288]:
pred_softmax = softmax_fashion.predict(X_test_fashion)
print('The testing accuracy is given by: %f' % (get_acc(pred_softmax, y_test_fashion)))

The testing accuracy is given by: 77.710000


### Softmax_Fashion-MNIST Kaggle Submission

Once you are satisfied with your solution and test accuracy output a file to submit your test set predictions to the Kaggle for Assignment 1 Fashion-MNIST. Use the following code to do so:

In [289]:
output_submission_csv('kaggle/softmax_submission_fashion.csv', softmax_fashion.predict(X_test_fashion))

## Train Softmax on Mushroom

In [290]:
lr = 0.05
n_epochs = 1000
reg_const = 0.0

softmax_MR = Softmax(n_class_MR, lr, n_epochs, reg_const)
softmax_MR.train(X_train_MR, y_train_MR)

Epoch1	, acc:48.9536, loss:0.8236
Epoch11	, acc:82.1912, loss:0.4911
Epoch21	, acc:84.3865, loss:0.4691
Epoch31	, acc:60.0328, loss:0.7129
Epoch41	, acc:84.9610, loss:0.4636
Epoch51	, acc:52.9955, loss:0.7833
Epoch61	, acc:89.0029, loss:0.4233
Epoch71	, acc:92.2856, loss:0.3903
Epoch81	, acc:90.5827, loss:0.4075
Epoch91	, acc:88.5925, loss:0.4273
Epoch101	, acc:91.3623, loss:0.3996
Epoch111	, acc:85.6996, loss:0.4562
Epoch121	, acc:55.5191, loss:0.7581
Epoch131	, acc:85.9664, loss:0.4536
Epoch141	, acc:56.3192, loss:0.7500
Epoch151	, acc:88.5310, loss:0.4280
Epoch161	, acc:93.3114, loss:0.3802
Epoch171	, acc:92.7165, loss:0.3861
Epoch181	, acc:88.6130, loss:0.4271
Epoch191	, acc:92.5318, loss:0.3879
Epoch201	, acc:86.5819, loss:0.4475
Epoch211	, acc:91.4444, loss:0.3988
Epoch221	, acc:90.9110, loss:0.4041
Epoch231	, acc:91.7932, loss:0.3953
Epoch241	, acc:92.5728, loss:0.3874
Epoch251	, acc:91.8547, loss:0.3948
Epoch261	, acc:92.7780, loss:0.3855
Epoch271	, acc:90.6853, loss:0.4064
Epo

In [291]:
pred_softmax = softmax_MR.predict(X_train_MR)
print('The training accuracy is given by: %f' % (get_acc(pred_softmax, y_train_MR)))

The training accuracy is given by: 94.686089


### Validate Softmax on Mushroom

In [292]:
pred_softmax = softmax_MR.predict(X_val_MR)
print('The validation accuracy is given by: %f' % (get_acc(pred_softmax, y_val_MR)))

The validation accuracy is given by: 94.153846


### Testing Softmax on Mushroom

In [293]:
pred_softmax = softmax_MR.predict(X_test_MR)
print('The testing accuracy is given by: %f' % (get_acc(pred_softmax, y_test_MR)))

The testing accuracy is given by: 94.338462


# Logistic Classifier

The Logistic Classifier has 2 hyperparameters that you can experiment with:
- **Learning rate** - similar to as defined above in Perceptron, this parameter scales by how much the weights are changed according to the calculated gradient update. 
- **Number of Epochs** - As described for perceptron.
- **Threshold** - The decision boundary of the classifier.


You will implement the Logistic Classifier in the **models/logistic.py**

The following code: 
- Creates an instance of the Logistic classifier class 
- The train function of the Logistic class is trained on the training data
- We use the predict function to find the training accuracy as well as the testing accuracy

### Training Logistic Classifer

In [122]:
learning_rate = 0.01
n_epochs = 1000
threshold = 0.5

lr = Logistic(learning_rate, n_epochs, threshold)
lr.train(X_train_MR, y_train_MR)

Epoch1, acc:88.79770209273697
Epoch11, acc:93.37299958965943
Epoch21, acc:94.00902749281904
Epoch31, acc:94.11161263848994
Epoch41, acc:94.27574887156341
Epoch51, acc:94.62453836684448
Epoch61, acc:94.82970865818629
Epoch71, acc:94.99384489125976
Epoch81, acc:95.19901518260156
Epoch91, acc:95.2605662700041
Epoch101, acc:95.40418547394337
Epoch111, acc:95.56832170701682
Epoch121, acc:95.60935576528519
Epoch131, acc:95.60935576528519
Epoch141, acc:95.71194091095609
Epoch151, acc:95.75297496922445
Epoch161, acc:95.79400902749282
Epoch171, acc:95.814526056627
Epoch181, acc:95.87607714402955
Epoch191, acc:95.87607714402955
Epoch201, acc:95.89659417316373
Epoch211, acc:95.89659417316373
Epoch221, acc:95.93762823143209
Epoch231, acc:95.91711120229792
Epoch241, acc:95.91711120229792
Epoch251, acc:95.91711120229792
Epoch261, acc:95.95814526056627
Epoch271, acc:95.95814526056627
Epoch281, acc:95.93762823143209
Epoch291, acc:95.99917931883463
Epoch301, acc:96.040213377103
Epoch311, acc:96.0196963

In [118]:
pred_lr = lr.predict(X_train_MR)
print('The training accuracy is given by: %f' % (get_acc(pred_lr, y_train_MR)))

The training accuracy is given by: 96.183833


### Validate Logistic Classifer

In [120]:
pred_lr = lr.predict(X_val_MR)
print('The validation accuracy is given by: %f' % (get_acc(pred_lr, y_val_MR)))

The validation accuracy is given by: 96.184615


### Test Logistic Classifier

In [121]:
pred_lr = lr.predict(X_test_MR)
print('The testing accuracy is given by: %f' % (get_acc(pred_lr, y_test_MR)))

The testing accuracy is given by: 96.000000
