In [2]:
import random
import numpy as np
from data_process import get_FASHION_data, get_MUSHROOM_data
from scipy.spatial import distance
from models import Perceptron, SVM, Softmax, Logistic
from kaggle_submission import output_submission_csv
%matplotlib inline

# For auto-reloading external modules
# See http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2



# Loading Fashion-MNIST

In the following cells we determine the number of images for each split and load the images.
<br /> 
TRAIN_IMAGES + VAL_IMAGES = (0, 60000]
, TEST_IMAGES = 10000

In [28]:
# You can change these numbers for experimentation
# For submission we will use the default values 
TRAIN_IMAGES = 50000
VAL_IMAGES = 10000
normalize = True

In [29]:
data = get_FASHION_data(TRAIN_IMAGES, VAL_IMAGES, normalize=normalize)
X_train_fashion, y_train_fashion = data['X_train'], data['y_train']
X_val_fashion, y_val_fashion = data['X_val'], data['y_val']
X_test_fashion, y_test_fashion = data['X_test'], data['y_test']
n_class_fashion = len(np.unique(y_test_fashion))

# Loading Mushroom

In the following cells we determine the splitting of the mushroom dataset.
<br /> TRAINING + VALIDATION = 0.8, TESTING = 0.2

In [30]:
# TRAINING = 0.6 indicates 60% of the data is used as the training dataset.
VALIDATION = 0.2

In [31]:
data = get_MUSHROOM_data(VALIDATION)
X_train_MR, y_train_MR = data['X_train'], data['y_train']
X_val_MR, y_val_MR = data['X_val'], data['y_val']
X_test_MR, y_test_MR = data['X_test'], data['y_test']
n_class_MR = len(np.unique(y_test_MR))

print("Number of train samples: ", X_train_MR.shape[0])
print("Number of val samples: ", X_val_MR.shape[0])
print("Number of test samples: ", X_test_MR.shape[0])

Number of train samples:  4874
Number of val samples:  1625
Number of test samples:  1625


In [32]:
X_train_MR.shape

(4874, 22)

### Get Accuracy

This function computes how well your model performs using accuracy as a metric.

In [33]:
def get_acc(pred, y_test):
    return np.sum(y_test == pred) / len(y_test) * 100

# Perceptron

Perceptron has 2 hyperparameters that you can experiment with:
- **Learning rate** - controls how much we change the current weights of the classifier during each update. We set it at a default value of 0.5, but you should experiment with different values. We recommend changing the learning rate by factors of 10 and observing how the performance of the classifier changes. You should also try adding a **decay** which slowly reduces the learning rate over each epoch.
- **Number of Epochs** - An epoch is a complete iterative pass over all of the data in the dataset. During an epoch we predict a label using the classifier and then update the weights of the classifier according to the perceptron update rule for each sample in the training set. You should try different values for the number of training epochs and report your results.

You will implement the Perceptron classifier in the **models/perceptron.py**

The following code: 
- Creates an instance of the Perceptron classifier class 
- The train function of the Perceptron class is trained on the training data
- We use the predict function to find the training accuracy as well as the testing accuracy


## Train Perceptron on Fashion-MNIST

### perceptron code

In [332]:
# multiclass perceptron

"""Perceptron model."""

import numpy as np


class Perceptron:
    def __init__(self, n_class: int, lr: float, epochs: int):
        """Initialize a new classifier.

        Parameters:
            n_class: the number of classes
            lr: the learning rate
            epochs: the number of epochs to train for
        """
        self.w = None  # TODO: change this W
        self.lr = lr
        self.epochs = epochs
        self.n_class = n_class

    def train(self, X_train: np.ndarray, y_train: np.ndarray, X_val=X_train , y_val=y_train):
        """Train the classifier.

        Use the perceptron update rule as introduced in the Lecture.

        Parameters:
            X_train: a number array of shape (N, D) containing training data;
                N examples with D dimensions
            y_train: a numpy array of shape (N,) containing training labels
        """
        
        # TODO: implement me
        N,D = X_train.shape
        random.seed(5)
        upper_b = np.min(X_train)
        lower_b = np.max(X_train)
        self.w = np.array([[random.uniform(lower_b,upper_b) for j in range(X_train.shape[1])] for i in range(self.n_class)])

        
        pred_svm_t = self.predict(X_train)
        t_acc = self.get_acc(pred_svm_t, y_train)
        
        for epoch in range(self.epochs):
            # for each of the training data
            for i in range(X_train.shape[0]):
                for c in range(len(self.w)):
                    if np.dot(self.w[c],X_train[i]) > np.dot(self.w[y_train[i]],X_train[i]):
                        self.w[y_train[i]] = self.w[y_train[i]] + self.lr*X_train[i]
                        self.w[c] = self.w[c] - self.lr*X_train[i]
                if i%100 == 0:
                    ret = self.predict(X_train)
                    t_cur_acc = self.get_acc(ret, y_train)
                    pred_svm_v = self.predict(X_val)
                    cur_v_acc = self.get_acc(pred_svm_v, y_val)

                    print("\tBatch",i,"of",N,"training acc",t_cur_acc,"val acc",cur_v_acc)
                    # early stop
                    if cur_v_acc >= 83: # found 83
                            return

        
    def get_acc(self, pred, y_test):
        return np.sum(y_test == pred) / len(y_test) * 100
    
                        
    def predict(self, X_test: np.ndarray) -> np.ndarray:
        """Use the trained weights to predict labels for test data points.

        Parameters:
            X_test: a numpy array of shape (N, D) containing testing data;
                N examples with D dimensions

        Returns:
            predicted labels for the data in X_test; a 1-dimensional array of
                length N, where each element is an integer giving the predicted
                class.
        """
        ret = []
        for i in range(X_test.shape[0]):
            scores = np.dot(self.w, X_test[i])
            max_score = float("-inf")
            max_class = -1
            for i in range(len(scores)):
                if scores[i] > max_score:
                    max_score = scores[i]
                    max_class = i
            ret.append(max_class)
        return np.array(ret)


NameError: name 'X_train' is not defined

In [293]:
lr = 0.2
n_epochs = 10

percept_fashion = Perceptron(n_class_fashion, lr, n_epochs)
percept_fashion.train(X_train_fashion, y_train_fashion, X_val_fashion, y_val_fashion)

	Batch 0 of 50000 training acc 16.724 val acc 16.38
	Batch 100 of 50000 training acc 55.198 val acc 55.60000000000001
	Batch 200 of 50000 training acc 65.446 val acc 65.71000000000001
	Batch 300 of 50000 training acc 62.73 val acc 61.8
	Batch 400 of 50000 training acc 62.138000000000005 val acc 61.9
	Batch 500 of 50000 training acc 62.024 val acc 62.36000000000001
	Batch 600 of 50000 training acc 68.714 val acc 68.14
	Batch 700 of 50000 training acc 67.048 val acc 67.72
	Batch 800 of 50000 training acc 70.35 val acc 69.91000000000001
	Batch 900 of 50000 training acc 73.882 val acc 73.52
	Batch 1000 of 50000 training acc 68.58999999999999 val acc 68.33
	Batch 1100 of 50000 training acc 64.25999999999999 val acc 64.4
	Batch 1200 of 50000 training acc 68.76400000000001 val acc 68.21000000000001
	Batch 1300 of 50000 training acc 72.244 val acc 71.94
	Batch 1400 of 50000 training acc 72.87 val acc 71.88
	Batch 1500 of 50000 training acc 73.68 val acc 73.72999999999999
	Batch 1600 of 50000 t

	Batch 13800 of 50000 training acc 76.67399999999999 val acc 75.99000000000001
	Batch 13900 of 50000 training acc 77.73400000000001 val acc 76.94
	Batch 14000 of 50000 training acc 78.316 val acc 78.12
	Batch 14100 of 50000 training acc 78.846 val acc 78.06
	Batch 14200 of 50000 training acc 76.03999999999999 val acc 75.42
	Batch 14300 of 50000 training acc 75.89200000000001 val acc 75.79
	Batch 14400 of 50000 training acc 78.098 val acc 77.79
	Batch 14500 of 50000 training acc 78.708 val acc 78.62
	Batch 14600 of 50000 training acc 76.16000000000001 val acc 76.11
	Batch 14700 of 50000 training acc 81.21199999999999 val acc 80.71000000000001
	Batch 14800 of 50000 training acc 79.31200000000001 val acc 79.01
	Batch 14900 of 50000 training acc 79.684 val acc 79.10000000000001
	Batch 15000 of 50000 training acc 80.502 val acc 80.08999999999999
	Batch 15100 of 50000 training acc 75.128 val acc 74.47
	Batch 15200 of 50000 training acc 76.298 val acc 75.78
	Batch 15300 of 50000 training acc 

	Batch 27500 of 50000 training acc 80.08999999999999 val acc 79.05
	Batch 27600 of 50000 training acc 75.49 val acc 74.9
	Batch 27700 of 50000 training acc 72.456 val acc 71.21
	Batch 27800 of 50000 training acc 78.104 val acc 76.98
	Batch 27900 of 50000 training acc 77.578 val acc 76.99000000000001
	Batch 28000 of 50000 training acc 76.218 val acc 75.77000000000001
	Batch 28100 of 50000 training acc 80.46600000000001 val acc 79.78
	Batch 28200 of 50000 training acc 77.422 val acc 76.66
	Batch 28300 of 50000 training acc 79.166 val acc 77.89
	Batch 28400 of 50000 training acc 75.244 val acc 74.25
	Batch 28500 of 50000 training acc 75.588 val acc 74.5
	Batch 28600 of 50000 training acc 76.94800000000001 val acc 75.71
	Batch 28700 of 50000 training acc 80.054 val acc 79.42
	Batch 28800 of 50000 training acc 74.968 val acc 73.50999999999999
	Batch 28900 of 50000 training acc 79.008 val acc 78.21000000000001
	Batch 29000 of 50000 training acc 76.358 val acc 75.22
	Batch 29100 of 50000 trai

	Batch 41000 of 50000 training acc 79.24 val acc 78.21000000000001
	Batch 41100 of 50000 training acc 78.056 val acc 77.46
	Batch 41200 of 50000 training acc 76.51400000000001 val acc 75.69
	Batch 41300 of 50000 training acc 77.446 val acc 75.97
	Batch 41400 of 50000 training acc 74.75 val acc 73.39
	Batch 41500 of 50000 training acc 82.242 val acc 81.27
	Batch 41600 of 50000 training acc 81.27600000000001 val acc 80.33
	Batch 41700 of 50000 training acc 80.916 val acc 79.94
	Batch 41800 of 50000 training acc 79.188 val acc 77.7
	Batch 41900 of 50000 training acc 74.616 val acc 73.41
	Batch 42000 of 50000 training acc 77.68599999999999 val acc 76.36
	Batch 42100 of 50000 training acc 78.28399999999999 val acc 77.19
	Batch 42200 of 50000 training acc 78.572 val acc 77.72
	Batch 42300 of 50000 training acc 78.988 val acc 77.61
	Batch 42400 of 50000 training acc 78.818 val acc 77.66999999999999
	Batch 42500 of 50000 training acc 78.444 val acc 77.57
	Batch 42600 of 50000 training acc 79.3

	Batch 4600 of 50000 training acc 80.142 val acc 79.25
	Batch 4700 of 50000 training acc 79.918 val acc 78.64
	Batch 4800 of 50000 training acc 81.256 val acc 79.85
	Batch 4900 of 50000 training acc 80.852 val acc 80.21000000000001
	Batch 5000 of 50000 training acc 82.518 val acc 81.2
	Batch 5100 of 50000 training acc 78.72200000000001 val acc 76.99000000000001
	Batch 5200 of 50000 training acc 79.606 val acc 77.94
	Batch 5300 of 50000 training acc 80.69800000000001 val acc 79.83
	Batch 5400 of 50000 training acc 78.348 val acc 77.56
	Batch 5500 of 50000 training acc 79.972 val acc 78.75999999999999
	Batch 5600 of 50000 training acc 77.91 val acc 76.03999999999999
	Batch 5700 of 50000 training acc 75.47 val acc 74.08
	Batch 5800 of 50000 training acc 81.23599999999999 val acc 80.54
	Batch 5900 of 50000 training acc 78.59 val acc 77.91
	Batch 6000 of 50000 training acc 80.542 val acc 79.43
	Batch 6100 of 50000 training acc 72.586 val acc 71.41
	Batch 6200 of 50000 training acc 76.854 va

In [294]:
pred_percept = percept_fashion.predict(X_train_fashion)
print('The training accuracy is given by: %f' % (get_acc(pred_percept, y_train_fashion)))

The training accuracy is given by: 82.796000


### Validate Perceptron on Fashion-MNIST

In [302]:
pred_percept = percept_fashion.predict(X_val_fashion)
print('The validation accuracy is given by: %f' % (get_acc(pred_percept, y_val_fashion)))

The validation accuracy is given by: 81.960000


### Test Perceptron on Fashion-MNIST

In [296]:
pred_percept = percept_fashion.predict(X_test_fashion)
print('The testing accuracy is given by: %f' % (get_acc(pred_percept, y_test_fashion)))

The testing accuracy is given by: 80.840000


### Perceptron_Fashion-MNIST Kaggle Submission

Once you are satisfied with your solution and test accuracy, output a file to submit your test set predictions to the Kaggle for Assignment 1 Fashion-MNIST. Use the following code to do so:

In [297]:
output_submission_csv('kaggle/perceptron_submission_fashion.csv', percept_fashion.predict(X_test_fashion))

## Train Perceptron on Mushroom

In [None]:
lr = 0.5
n_epochs = 10

percept_MR = Perceptron(n_class_MR, lr, n_epochs)
percept_MR.train(X_train_MR, y_train_MR, X_val_MR, y_val_MR)

[autoreload of models.perceptron failed: Traceback (most recent call last):
  File "C:\Program Files\Anaconda3\lib\site-packages\IPython\extensions\autoreload.py", line 244, in check
    superreload(m, reload, self.old_objects)
  File "C:\Program Files\Anaconda3\lib\site-packages\IPython\extensions\autoreload.py", line 378, in superreload
    module = reload(module)
  File "C:\Program Files\Anaconda3\lib\imp.py", line 314, in reload
    return importlib.reload(module)
  File "C:\Program Files\Anaconda3\lib\importlib\__init__.py", line 169, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 630, in _exec
  File "<frozen importlib._bootstrap_external>", line 728, in exec_module
  File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
  File "C:\Users\yunxi\Desktop\CS444Vision\assignment1\models\perceptron.py", line 10, in <module>
    class Perceptron:
  File "C:\Users\yunxi\Desktop\CS444Vision\assignment1\models\perceptron.py"

	Batch 0 of 4874 training acc 48.317603610997125 val acc 49.16923076923077
	Batch 100 of 4874 training acc 72.28149363972098 val acc 72.18461538461538
	Batch 200 of 4874 training acc 83.72999589659418 val acc 81.78461538461539
	Batch 300 of 4874 training acc 48.11243331965531 val acc 48.55384615384615
	Batch 400 of 4874 training acc 63.52072219942553 val acc 63.44615384615384
	Batch 500 of 4874 training acc 87.85391875256462 val acc 87.56923076923077
	Batch 600 of 4874 training acc 72.11735740664751 val acc 69.96923076923078
	Batch 700 of 4874 training acc 75.89249076733688 val acc 76.36923076923077
	Batch 800 of 4874 training acc 50.84119819450144 val acc 51.44615384615384
	Batch 900 of 4874 training acc 88.83873615100534 val acc 87.44615384615385
	Batch 1000 of 4874 training acc 70.51702913418137 val acc 70.21538461538461
	Batch 1100 of 4874 training acc 78.47763643824375 val acc 75.6923076923077
	Batch 1200 of 4874 training acc 83.032416906032 val acc 82.33846153846154
	Batch 1300 o

	Batch 1200 of 4874 training acc 92.44973327862127 val acc 92.06153846153846
	Batch 1300 of 4874 training acc 90.33647927780058 val acc 88.8
	Batch 1400 of 4874 training acc 93.80385720147723 val acc 93.04615384615384
	Batch 1500 of 4874 training acc 86.72548215018465 val acc 85.16923076923077
	Batch 1600 of 4874 training acc 92.75748871563397 val acc 91.87692307692308
	Batch 1700 of 4874 training acc 74.47681575707837 val acc 74.33846153846154
	Batch 1800 of 4874 training acc 94.54247025030776 val acc 94.15384615384616
	Batch 1900 of 4874 training acc 93.08576118178088 val acc 92.18461538461538
	Batch 2000 of 4874 training acc 70.5990972507181 val acc 70.33846153846154
	Batch 2100 of 4874 training acc 83.1555190808371 val acc 83.13846153846154
	Batch 2200 of 4874 training acc 83.36068937217891 val acc 80.92307692307692
	Batch 2300 of 4874 training acc 90.52113254000821 val acc 89.53846153846153
	Batch 2400 of 4874 training acc 92.1830118998769 val acc 91.63076923076923
	Batch 2500 of 

In [None]:
pred_percept = percept_MR.predict(X_train_MR)
print('The training accuracy is given by: %f' % (get_acc(pred_percept, y_train_MR)))

### Validate Perceptron on Mushroom

In [307]:
pred_percept = percept_MR.predict(X_val_MR)
print('The validation accuracy is given by: %f' % (get_acc(pred_percept, y_val_MR)))

The validation accuracy is given by: 95.630769


### Test Perceptron on Mushroom

In [308]:
pred_percept = percept_MR.predict(X_test_MR)
print('The testing accuracy is given by: %f' % (get_acc(pred_percept, y_test_MR)))

The testing accuracy is given by: 95.200000


# Support Vector Machines (with SGD)

Next, you will implement a "soft margin" SVM. In this formulation you will maximize the margin between positive and negative training examples and penalize margin violations using a hinge loss.

We will optimize the SVM loss using SGD. This means you must compute the loss function with respect to model weights. You will use this gradient to update the model weights.

SVM optimized with SGD has 3 hyperparameters that you can experiment with:
- **Learning rate** - similar to as defined above in Perceptron, this parameter scales by how much the weights are changed according to the calculated gradient update. 
- **Epochs** - similar to as defined above in Perceptron.
- **Regularization constant** - Hyperparameter to determine the strength of regularization. In this case it is a coefficient on the term which maximizes the margin. You could try different values. The default value is set to 0.05.

You will implement the SVM using SGD in the **models/svm.py**

The following code: 
- Creates an instance of the SVM classifier class 
- The train function of the SVM class is trained on the training data
- We use the predict function to find the training accuracy as well as the testing accuracy

## Supportive Vector Machine Code

In [317]:
"""Support Vector Machine (SVM) model."""

import numpy as np
import copy


class SVM:
    def __init__(self, n_class: int, lr: float, epochs: int, reg_const: float):
        """Initialize a new classifier.

        Parameters:
            n_class: the number of classes
            lr: the learning rate
            epochs: the number of epochs to train for
            reg_const: the regularization constant
        """
        self.w = None  # TODO: change this
        self.lr = lr
        self.epochs = epochs
        self.reg_const = reg_const
        self.n_class = n_class

    def calc_gradient(self, X_train: np.ndarray, y_train: np.ndarray) -> np.ndarray:
        """Calculate gradient of the svm hinge loss.

        Inputs have dimension D, there are C classes, and we operate on
        mini-batches of N examples.

        Parameters:
            X_train: a numpy array of shape (N, D) containing a mini-batch
                of data
            y_train: a numpy array of shape (N,) containing training labels;
                y[i] = c means that X[i] has label c, where 0 <= c < C

        Returns:
            the gradient with respect to weights w; an array of the same shape
                as w
        """
        # TODO: implement me we don't do reg_term here
        grad_w = np.array([[0 for j in range(X_train.shape[1])] for i in range(self.n_class)]).astype("float")
        N,D = X_train.shape
        
        for i in range(N):
            tmp_grad = np.array([[0 for j in range(X_train.shape[1])] for i in range(self.n_class)]).astype("float")
            sum_xi = np.array([0 for i in range(D)]).astype("float")
            for c in range(len(tmp_grad)):
                if c != y_train[i]: # 
                    if  np.dot(self.w[y_train[i]],X_train[i]) - np.dot(self.w[c],X_train[i]) < 1:
                        tmp_grad[c] = X_train[i]
                        tmp_grad[y_train[i]] -= X_train[i]
            grad_w += tmp_grad
        return grad_w
        
        
            
    def train(self, X_train: np.ndarray, y_train: np.ndarray, X_val: np.ndarray, y_val: np.ndarray):
        """Train the classifier.

        Hint: operate on mini-batches of data for SGD.

        Parameters:
            X_train: a numpy array of shape (N, D) containing training data;
                N examples with D dimensions
            y_train: a numpy array of shape (N,) containing training labels
        """
        # TODO: implement me
        BATCH_SIZE = 128
        
        # start with random weights
        random.seed(666)
        b_up = np.min(X_train)
        b_low = np.max(X_train)
        self.w = np.array([[random.uniform(b_low,b_up) for j in range(X_train.shape[1])] for i in range(self.n_class)])
        
        N,D = X_train.shape
        it = N//BATCH_SIZE
        
        pred_svm_t = self.predict(X_train)
        t_acc = self.get_acc(pred_svm_t, y_train)
        
        for epoch in range(self.epochs):
            print("epoch",epoch)
            for i in range(N//BATCH_SIZE): # feed in training data batch-wise
                X_train_batch = X_train[BATCH_SIZE*i:BATCH_SIZE*(i+1)]
                y_train_batch = y_train[BATCH_SIZE*i:BATCH_SIZE*(i+1)]
        
                grad_w = self.calc_gradient(X_train_batch, y_train_batch)
                old_w = copy.deepcopy(self.w)
                for c in range(len(self.w)):
                    # TODO: update w
                    self.w[c] = (1-self.lr*self.reg_const/it)*old_w[c] - self.lr*grad_w[c]
                
                ret = self.predict(X_train)
                t_cur_acc = self.get_acc(ret, y_train)
                pred_svm_v = self.predict(X_val)
                cur_v_acc = self.get_acc(pred_svm_v, y_val)
                if i%1 == 0:
                    print("\tBatch",i,"of",it,"training acc",t_cur_acc,"val acc",cur_v_acc)
                    # early stop
                if cur_v_acc >= 83: # found 83
                        return
                    
    def get_acc(self, pred, y_test):
        return np.sum(y_test == pred) / len(y_test) * 100

    def predict(self, X_test: np.ndarray) -> np.ndarray:
        """Use the trained weights to predict labels for test data points.

        Parameters:
            X_test: a numpy array of shape (N, D) containing testing data;
                N examples with D dimensions

        Returns:
            predicted labels for the data in X_test; a 1-dimensional array of
                length N, where each element is an integer giving the predicted
                class.
        """
        # TODO: implement me
        ret = []
        for i in range(X_test.shape[0]):
            scores = np.dot(self.w, X_test[i])
#             print(scores)
            max_score = float("-inf")
            max_class = -1
            for j in range(len(scores)):
                if scores[j] > max_score:
                    max_score = scores[j]
                    max_class = j
            ret.append(max_class)
        return np.array(ret)


## Train SVM on Fashion-MNIST

In [1231]:
lr = 0.5
n_epochs = 5
reg_const = 0.05

svm_fashion = SVM(n_class_fashion, lr, n_epochs, reg_const)
svm_fashion.train(X_train_fashion, y_train_fashion,X_val_fashion,y_val_fashion)

epoch 0
	Batch 0 of 390 training acc 43.632 val acc 44.019999999999996
	Batch 1 of 390 training acc 52.028 val acc 52.290000000000006
	Batch 2 of 390 training acc 54.647999999999996 val acc 55.15
	Batch 3 of 390 training acc 62.726000000000006 val acc 62.55
	Batch 4 of 390 training acc 64.66 val acc 64.03999999999999
	Batch 5 of 390 training acc 67.628 val acc 67.56
	Batch 6 of 390 training acc 70.368 val acc 70.0
	Batch 7 of 390 training acc 69.45400000000001 val acc 69.0
	Batch 8 of 390 training acc 72.906 val acc 72.64
	Batch 9 of 390 training acc 72.372 val acc 72.26
	Batch 10 of 390 training acc 71.454 val acc 71.00999999999999
	Batch 11 of 390 training acc 70.658 val acc 70.48
	Batch 12 of 390 training acc 72.174 val acc 71.63000000000001
	Batch 13 of 390 training acc 73.10799999999999 val acc 73.21
	Batch 14 of 390 training acc 74.606 val acc 74.42999999999999
	Batch 15 of 390 training acc 74.442 val acc 74.44
	Batch 16 of 390 training acc 73.188 val acc 72.87
	Batch 17 of 390 t

	Batch 147 of 390 training acc 75.31400000000001 val acc 74.64
	Batch 148 of 390 training acc 70.842 val acc 70.19999999999999
	Batch 149 of 390 training acc 73.874 val acc 73.19
	Batch 150 of 390 training acc 79.72 val acc 79.35
	Batch 151 of 390 training acc 80.268 val acc 79.56
	Batch 152 of 390 training acc 76.974 val acc 76.38000000000001
	Batch 153 of 390 training acc 81.922 val acc 81.35
	Batch 154 of 390 training acc 77.61 val acc 77.32
	Batch 155 of 390 training acc 78.08 val acc 77.38000000000001
	Batch 156 of 390 training acc 79.188 val acc 78.44
	Batch 157 of 390 training acc 76.744 val acc 76.73
	Batch 158 of 390 training acc 78.474 val acc 77.88000000000001
	Batch 159 of 390 training acc 80.292 val acc 79.65
	Batch 160 of 390 training acc 75.062 val acc 73.95
	Batch 161 of 390 training acc 72.386 val acc 71.82
	Batch 162 of 390 training acc 67.95400000000001 val acc 67.82000000000001
	Batch 163 of 390 training acc 73.556 val acc 72.77
	Batch 164 of 390 training acc 77.212

	Batch 294 of 390 training acc 76.02799999999999 val acc 75.39
	Batch 295 of 390 training acc 80.33 val acc 80.07
	Batch 296 of 390 training acc 78.208 val acc 77.52
	Batch 297 of 390 training acc 77.324 val acc 76.91
	Batch 298 of 390 training acc 79.11399999999999 val acc 78.62
	Batch 299 of 390 training acc 75.71799999999999 val acc 75.14
	Batch 300 of 390 training acc 79.56400000000001 val acc 79.14999999999999
	Batch 301 of 390 training acc 81.978 val acc 81.26
	Batch 302 of 390 training acc 80.994 val acc 80.25
	Batch 303 of 390 training acc 78.774 val acc 78.18
	Batch 304 of 390 training acc 73.482 val acc 72.78
	Batch 305 of 390 training acc 76.088 val acc 75.77000000000001
	Batch 306 of 390 training acc 75.022 val acc 74.06
	Batch 307 of 390 training acc 77.704 val acc 77.45
	Batch 308 of 390 training acc 66.988 val acc 66.72
	Batch 309 of 390 training acc 68.726 val acc 68.4
	Batch 310 of 390 training acc 77.58800000000001 val acc 76.52
	Batch 311 of 390 training acc 77.672 v

	Batch 52 of 390 training acc 83.016 val acc 82.11
	Batch 53 of 390 training acc 81.66 val acc 80.60000000000001
	Batch 54 of 390 training acc 76.386 val acc 75.36
	Batch 55 of 390 training acc 78.192 val acc 77.17
	Batch 56 of 390 training acc 67.976 val acc 66.52
	Batch 57 of 390 training acc 69.726 val acc 68.91000000000001
	Batch 58 of 390 training acc 64.872 val acc 63.63999999999999
	Batch 59 of 390 training acc 73.136 val acc 72.02
	Batch 60 of 390 training acc 76.084 val acc 74.88
	Batch 61 of 390 training acc 75.884 val acc 75.17
	Batch 62 of 390 training acc 77.618 val acc 77.07000000000001
	Batch 63 of 390 training acc 81.186 val acc 80.38
	Batch 64 of 390 training acc 81.696 val acc 80.74
	Batch 65 of 390 training acc 81.042 val acc 80.47999999999999
	Batch 66 of 390 training acc 81.44 val acc 80.71000000000001
	Batch 67 of 390 training acc 80.972 val acc 79.91
	Batch 68 of 390 training acc 81.356 val acc 80.52
	Batch 69 of 390 training acc 78.704 val acc 77.51
	Batch 70 of

	Batch 200 of 390 training acc 76.336 val acc 75.98
	Batch 201 of 390 training acc 76.108 val acc 74.85000000000001
	Batch 202 of 390 training acc 80.032 val acc 78.91
	Batch 203 of 390 training acc 82.222 val acc 81.17999999999999
	Batch 204 of 390 training acc 81.64200000000001 val acc 80.36999999999999
	Batch 205 of 390 training acc 81.374 val acc 80.2
	Batch 206 of 390 training acc 81.116 val acc 79.96
	Batch 207 of 390 training acc 79.892 val acc 79.33
	Batch 208 of 390 training acc 76.05199999999999 val acc 75.24
	Batch 209 of 390 training acc 82.018 val acc 81.07
	Batch 210 of 390 training acc 83.964 val acc 83.00999999999999
	Batch 211 of 390 training acc 83.304 val acc 82.16
	Batch 212 of 390 training acc 79.264 val acc 78.08
	Batch 213 of 390 training acc 81.372 val acc 80.58
	Batch 214 of 390 training acc 74.124 val acc 72.88
	Batch 215 of 390 training acc 72.056 val acc 71.84
	Batch 216 of 390 training acc 76.068 val acc 75.42
	Batch 217 of 390 training acc 81.038 val acc 8

	Batch 348 of 390 training acc 77.01599999999999 val acc 75.7
	Batch 349 of 390 training acc 75.576 val acc 74.91
	Batch 350 of 390 training acc 71.37400000000001 val acc 69.97
	Batch 351 of 390 training acc 67.204 val acc 66.3
	Batch 352 of 390 training acc 79.652 val acc 78.95
	Batch 353 of 390 training acc 74.32 val acc 73.35000000000001
	Batch 354 of 390 training acc 72.15 val acc 72.05
	Batch 355 of 390 training acc 78.45400000000001 val acc 77.42
	Batch 356 of 390 training acc 79.426 val acc 78.64
	Batch 357 of 390 training acc 80.16 val acc 78.71000000000001
	Batch 358 of 390 training acc 81.152 val acc 80.04
	Batch 359 of 390 training acc 78.94 val acc 77.64999999999999
	Batch 360 of 390 training acc 77.59 val acc 76.32
	Batch 361 of 390 training acc 72.584 val acc 71.37
	Batch 362 of 390 training acc 78.096 val acc 77.08
	Batch 363 of 390 training acc 73.292 val acc 72.61
	Batch 364 of 390 training acc 81.994 val acc 81.17999999999999
	Batch 365 of 390 training acc 82.652 val 

	Batch 107 of 390 training acc 79.33 val acc 78.27
	Batch 108 of 390 training acc 82.554 val acc 81.57
	Batch 109 of 390 training acc 81.53200000000001 val acc 80.43
	Batch 110 of 390 training acc 81.52199999999999 val acc 80.72
	Batch 111 of 390 training acc 75.432 val acc 74.7
	Batch 112 of 390 training acc 77.05 val acc 76.13
	Batch 113 of 390 training acc 82.99600000000001 val acc 82.42
	Batch 114 of 390 training acc 80.86 val acc 79.88
	Batch 115 of 390 training acc 79.412 val acc 78.53999999999999
	Batch 116 of 390 training acc 82.536 val acc 81.62
	Batch 117 of 390 training acc 81.826 val acc 80.78
	Batch 118 of 390 training acc 79.448 val acc 78.83
	Batch 119 of 390 training acc 81.596 val acc 80.25
	Batch 120 of 390 training acc 78.352 val acc 77.27000000000001
	Batch 121 of 390 training acc 74.876 val acc 74.81
	Batch 122 of 390 training acc 79.208 val acc 78.31
	Batch 123 of 390 training acc 83.592 val acc 82.73
	Batch 124 of 390 training acc 78.794 val acc 78.21000000000001

In [1237]:
pred_svm = svm_fashion.predict(X_train_fashion)
print('The training accuracy is given by: %f' % (get_acc(pred_svm, y_train_fashion)))

The training accuracy is given by: 83.884000


In [1238]:
print(pred_svm[0:33])
print(y_train_fashion[0:33])

[9 0 4 6 3 2 7 2 5 5 0 9 5 7 7 9 1 0 2 4 3 1 4 8 2 3 0 2 4 4 5 3 6]
[9 0 0 3 0 2 7 2 5 5 0 9 5 5 7 9 1 0 6 4 3 1 4 8 4 3 0 2 4 4 5 3 6]


### Validate SVM on Fashion-MNIST

In [1239]:
pred_svm = svm_fashion.predict(X_val_fashion)
print('The validation accuracy is given by: %f' % (get_acc(pred_svm, y_val_fashion)))

The validation accuracy is given by: 83.440000


### Test SVM on Fashion-MNIST

In [1240]:
pred_svm = svm_fashion.predict(X_test_fashion)
print('The testing accuracy is given by: %f' % (get_acc(pred_svm, y_test_fashion)))

The testing accuracy is given by: 81.990000


In [1241]:
print(pred_svm[0:33])
print(y_test_fashion[0:33])

[9 2 1 1 6 1 4 6 5 7 4 5 7 3 4 1 2 2 8 0 0 7 7 7 1 2 6 3 9 3 8 8 3]
[9 2 1 1 6 1 4 6 5 7 4 5 7 3 4 1 2 4 8 0 2 5 7 9 1 4 6 0 9 3 8 8 3]


### SVM_Fashion-MNIST Kaggle Submission

Once you are satisfied with your solution and test accuracy output a file to submit your test set predictions to the Kaggle for Assignment 1 Fashion-MNIST. Use the following code to do so:

In [1242]:
output_submission_csv('kaggle/svm_submission_fashion.csv', svm_fashion.predict(X_test_fashion))

## Train SVM on Mushroom

In [318]:
lr = 0.5
n_epochs = 10
reg_const = 0.05

svm_MR = SVM(n_class_MR, lr, n_epochs, reg_const)
svm_MR.train(X_train_MR, y_train_MR, X_val_MR, y_val_MR)

epoch 0
	Batch 0 of 38 training acc 51.682396389002875 val acc 50.830769230769235
	Batch 1 of 38 training acc 51.682396389002875 val acc 50.830769230769235
	Batch 2 of 38 training acc 48.317603610997125 val acc 49.16923076923077
	Batch 3 of 38 training acc 51.682396389002875 val acc 50.830769230769235
	Batch 4 of 38 training acc 48.317603610997125 val acc 49.16923076923077
	Batch 5 of 38 training acc 51.682396389002875 val acc 50.830769230769235
	Batch 6 of 38 training acc 60.99712761592121 val acc 58.27692307692308
	Batch 7 of 38 training acc 53.610997127615924 val acc 52.61538461538462
	Batch 8 of 38 training acc 63.828477636438244 val acc 61.23076923076923
	Batch 9 of 38 training acc 74.86663931062782 val acc 73.53846153846155
	Batch 10 of 38 training acc 70.94788674599918 val acc 68.55384615384615
	Batch 11 of 38 training acc 79.4009027492819 val acc 77.78461538461539
	Batch 12 of 38 training acc 65.08001641362331 val acc 62.76923076923077
	Batch 13 of 38 training acc 74.1280262617

	Batch 4 of 38 training acc 66.53672548215019 val acc 63.87692307692308
	Batch 5 of 38 training acc 62.51538777185064 val acc 63.38461538461539
	Batch 6 of 38 training acc 66.88551497743127 val acc 64.49230769230769
	Batch 7 of 38 training acc 74.02544111612639 val acc 72.36923076923077
	Batch 8 of 38 training acc 84.38654082888797 val acc 81.78461538461539
	Batch 9 of 38 training acc 83.54534263438654 val acc 83.01538461538462
	Batch 10 of 38 training acc 88.79770209273697 val acc 87.87692307692308
	Batch 11 of 38 training acc 67.00861715223635 val acc 68.18461538461538
	Batch 12 of 38 training acc 67.11120229790725 val acc 64.73846153846154
	Batch 13 of 38 training acc 62.37176856791137 val acc 63.38461538461539
	Batch 14 of 38 training acc 61.61263848994666 val acc 59.815384615384616
	Batch 15 of 38 training acc 66.86499794829709 val acc 67.50769230769231
	Batch 16 of 38 training acc 85.61756257693885 val acc 83.13846153846154
	Batch 17 of 38 training acc 86.39720968403776 val acc 8

	Batch 7 of 38 training acc 90.21337710299548 val acc 89.78461538461538
	Batch 8 of 38 training acc 82.29380385720148 val acc 82.46153846153847
	Batch 9 of 38 training acc 89.02338941321297 val acc 87.2
	Batch 10 of 38 training acc 91.21871153057037 val acc 90.64615384615384


In [319]:
pred_svm = svm_MR.predict(X_train_MR)
print('The training accuracy is given by: %f' % (get_acc(pred_svm, y_train_MR)))

The training accuracy is given by: 91.218712


### Validate SVM on Mushroom

In [320]:
pred_svm = svm_MR.predict(X_val_MR)
print('The validation accuracy is given by: %f' % (get_acc(pred_svm, y_val_MR)))

The validation accuracy is given by: 90.646154


## Test SVM on Mushroom

In [321]:
pred_svm = svm_MR.predict(X_test_MR)
print('The testing accuracy is given by: %f' % (get_acc(pred_svm, y_test_MR)))

The testing accuracy is given by: 90.523077


# Softmax Classifier (with SGD)

Next, you will train a Softmax classifier. This classifier consists of a linear function of the input data followed by a softmax function which outputs a vector of dimension C (number of classes) for each data point. Each entry of the softmax output vector corresponds to a confidence in one of the C classes, and like a probability distribution, the entries of the output vector sum to 1. We use a cross-entropy loss on this sotmax output to train the model. 

Check the following link as an additional resource on softmax classification: http://cs231n.github.io/linear-classify/#softmax

Once again we will train the classifier with SGD. This means you need to compute the gradients of the softmax cross-entropy loss function according to the weights and update the weights using this gradient. Check the following link to help with implementing the gradient updates: https://deepnotes.io/softmax-crossentropy

The softmax classifier has 3 hyperparameters that you can experiment with:
- **Learning rate** - As above, this controls how much the model weights are updated with respect to their gradient.
- **Number of Epochs** - As described for perceptron.
- **Regularization constant** - Hyperparameter to determine the strength of regularization. In this case, we minimize the L2 norm of the model weights as regularization, so the regularization constant is a coefficient on the L2 norm in the combined cross-entropy and regularization objective.

You will implement a softmax classifier using SGD in the **models/softmax.py**

The following code: 
- Creates an instance of the Softmax classifier class 
- The train function of the Softmax class is trained on the training data
- We use the predict function to find the training accuracy as well as the testing accuracy

## Softmax

In [327]:
"""Softmax model."""

import numpy as np
import copy
from scipy.special import softmax


class Softmax:
    def __init__(self, n_class: int, lr: float, epochs: int, reg_const: float):
        """Initialize a new classifier.

        Parameters:
            n_class: the number of classes
            lr: the learning rate
            epochs: the number of epochs to train for
            reg_const: the regularization constant
        """
        self.w = None  # TODO: change this
        self.lr = lr
        self.epochs = epochs
        self.reg_const = reg_const
        self.n_class = n_class

    def calc_gradient(self, X_train: np.ndarray, y_train: np.ndarray) -> np.ndarray:
        """Calculate gradient of the softmax loss.

        Inputs have dimension D, there are C classes, and we operate on
        mini-batches of N examples.

        Parameters:
            X_train: a numpy array of shape (N, D) containing a mini-batch
                of data
            y_train: a numpy array of shape (N,) containing training labels;
                y[i] = c means that X[i] has label c, where 0 <= c < C

        Returns:
            gradient with respect to weights w; an array of same shape as w
        """
        # TODO: implement me

        N,D = X_train.shape
        grad_w = np.zeros((self.n_class,D))
        
        scores = softmax((self.w@X_train.T).T,axis = 1)
        for i in range(N): # for each of the data
            for j in range(self.n_class):
                if j == y_train[i]:
                    grad_w[j] += (scores[i][j] - 1)*X_train[i]
                else:
                    grad_w[j] += scores[i][j]*X_train[i]
        return grad_w
    
        

    def train(self, X_train: np.ndarray, y_train: np.ndarray, X_val: np.ndarray, y_val: np.ndarray):
        """Train the classifier.

        Hint: operate on mini-batches of data for SGD.

        Parameters:
            X_train: a numpy array of shape (N, D) containing training data;
                N examples with D dimensions
            y_train: a numpy array of shape (N,) containing training labels
        """
        # TODO: implement me
                # TODO: implement me
        BATCH_SIZE = 128
        
        # start with random weights
        random.seed(666)
        b_up = np.min(X_train)
        b_low = np.max(X_train)
        self.w = np.array([[random.uniform(b_low,b_up) for j in range(X_train.shape[1])] for i in range(self.n_class)])
        
        N,D = X_train.shape
        it = N//BATCH_SIZE
        
        pred_svm_t = self.predict(X_train)
        t_acc = self.get_acc(pred_svm_t, y_train)
        
        for epoch in range(self.epochs):
            print("epoch",epoch)
            for i in range(N//BATCH_SIZE): # feed in training data batch-wise
                    
                X_train_batch = X_train[BATCH_SIZE*i:BATCH_SIZE*(i+1)]
                y_train_batch = y_train[BATCH_SIZE*i:BATCH_SIZE*(i+1)]
        
                grad_w = self.calc_gradient(X_train_batch, y_train_batch)

                for c in range(len(self.w)):
                    self.w[c] = self.w[c] - self.lr*grad_w[c]
#                 return
            
                ret = self.predict(X_train)
                t_cur_acc = self.get_acc(ret, y_train)
                pred_svm_v = self.predict(X_val)
                cur_v_acc = self.get_acc(pred_svm_v, y_val)
                if i%1 == 0:
                    print("\tBatch",i,"of",it,"training acc",t_cur_acc,"val acc",cur_v_acc)
                    
                # early stop
                if cur_v_acc >= 90: # found 83
                        return
    
    
    
    def get_acc(self, pred, y_test):
        return np.sum(y_test == pred) / len(y_test) * 100

    
    
    def predict(self, X_test: np.ndarray) -> np.ndarray:
        """Use the trained weights to predict labels for test data points.

        Parameters:
            X_test: a numpy array of shape (N, D) containing testing data;
                N examples with D dimensions

        Returns:
            predicted labels for the data in X_test; a 1-dimensional array of
                length N, where each element is an integer giving the predicted
                class.
        """
        # TODO: implement me
        ret = []
        for i in range(X_test.shape[0]):
            scores = np.dot(self.w, X_test[i])
#             print(scores)
            max_score = float("-inf")
            max_class = -1
            for j in range(len(scores)):
                if scores[j] > max_score:
                    max_score = scores[j]
                    max_class = j
            ret.append(max_class)
        return np.array(ret)

## Train Softmax on Fashion-MNIST

In [110]:
lr = 0.5
n_epochs = 10
reg_const = 0.5

softmax_fashion = Softmax(n_class_fashion, lr, n_epochs, reg_const)
softmax_fashion.train(X_train_fashion, y_train_fashion,X_val_fashion,y_val_fashion)

epoch 0
	Batch 0 of 390 training acc 42.918 val acc 42.89
	Batch 1 of 390 training acc 50.334 val acc 50.370000000000005
	Batch 2 of 390 training acc 54.790000000000006 val acc 54.53
	Batch 3 of 390 training acc 56.716 val acc 56.58
	Batch 4 of 390 training acc 59.5 val acc 58.160000000000004
	Batch 5 of 390 training acc 63.376 val acc 62.81
	Batch 6 of 390 training acc 68.95599999999999 val acc 68.89
	Batch 7 of 390 training acc 68.49199999999999 val acc 68.44
	Batch 8 of 390 training acc 71.718 val acc 71.32
	Batch 9 of 390 training acc 68.274 val acc 67.25999999999999
	Batch 10 of 390 training acc 65.56 val acc 65.77
	Batch 11 of 390 training acc 61.842 val acc 61.47
	Batch 12 of 390 training acc 65.95400000000001 val acc 65.56
	Batch 13 of 390 training acc 62.49400000000001 val acc 62.11
	Batch 14 of 390 training acc 69.85 val acc 69.59
	Batch 15 of 390 training acc 61.022 val acc 60.38
	Batch 16 of 390 training acc 65.172 val acc 64.1
	Batch 17 of 390 training acc 72.762 val acc 7

	Batch 148 of 390 training acc 75.53999999999999 val acc 74.94
	Batch 149 of 390 training acc 74.388 val acc 73.83999999999999
	Batch 150 of 390 training acc 79.82000000000001 val acc 79.67999999999999
	Batch 151 of 390 training acc 79.022 val acc 78.67
	Batch 152 of 390 training acc 73.698 val acc 73.3
	Batch 153 of 390 training acc 75.888 val acc 75.7
	Batch 154 of 390 training acc 78.12 val acc 77.49000000000001
	Batch 155 of 390 training acc 77.41 val acc 77.05
	Batch 156 of 390 training acc 75.21600000000001 val acc 74.19
	Batch 157 of 390 training acc 77.95 val acc 77.42999999999999
	Batch 158 of 390 training acc 75.76 val acc 75.22
	Batch 159 of 390 training acc 77.58 val acc 77.37
	Batch 160 of 390 training acc 75.726 val acc 74.6
	Batch 161 of 390 training acc 74.966 val acc 74.55000000000001
	Batch 162 of 390 training acc 71.936 val acc 70.89
	Batch 163 of 390 training acc 74.05199999999999 val acc 73.75
	Batch 164 of 390 training acc 81.33200000000001 val acc 80.66
	Batch 16

	Batch 293 of 390 training acc 80.644 val acc 79.97999999999999
	Batch 294 of 390 training acc 78.228 val acc 77.24
	Batch 295 of 390 training acc 82.35 val acc 81.38
	Batch 296 of 390 training acc 80.522 val acc 79.63
	Batch 297 of 390 training acc 73.458 val acc 72.48
	Batch 298 of 390 training acc 77.27199999999999 val acc 76.64999999999999
	Batch 299 of 390 training acc 79.292 val acc 78.45
	Batch 300 of 390 training acc 79.252 val acc 78.59
	Batch 301 of 390 training acc 79.174 val acc 78.07
	Batch 302 of 390 training acc 83.204 val acc 82.44
	Batch 303 of 390 training acc 82.272 val acc 81.0
	Batch 304 of 390 training acc 80.11 val acc 78.97999999999999
	Batch 305 of 390 training acc 82.814 val acc 81.52000000000001
	Batch 306 of 390 training acc 77.678 val acc 76.52
	Batch 307 of 390 training acc 81.66799999999999 val acc 80.84
	Batch 308 of 390 training acc 80.182 val acc 79.36
	Batch 309 of 390 training acc 74.532 val acc 74.17
	Batch 310 of 390 training acc 73.13199999999999 

	Batch 48 of 390 training acc 80.38 val acc 79.21000000000001
	Batch 49 of 390 training acc 82.65599999999999 val acc 81.81
	Batch 50 of 390 training acc 78.61399999999999 val acc 77.01
	Batch 51 of 390 training acc 77.718 val acc 76.71
	Batch 52 of 390 training acc 76.16000000000001 val acc 75.68
	Batch 53 of 390 training acc 76.25800000000001 val acc 75.3
	Batch 54 of 390 training acc 75.484 val acc 74.82
	Batch 55 of 390 training acc 74.854 val acc 74.09
	Batch 56 of 390 training acc 78.24 val acc 77.18
	Batch 57 of 390 training acc 78.28 val acc 77.27000000000001
	Batch 58 of 390 training acc 78.264 val acc 77.36
	Batch 59 of 390 training acc 74.24199999999999 val acc 73.53
	Batch 60 of 390 training acc 75.054 val acc 74.22999999999999
	Batch 61 of 390 training acc 76.606 val acc 75.76
	Batch 62 of 390 training acc 78.604 val acc 77.66
	Batch 63 of 390 training acc 83.46000000000001 val acc 82.39999999999999
	Batch 64 of 390 training acc 79.566 val acc 78.96
	Batch 65 of 390 traini

	Batch 194 of 390 training acc 83.206 val acc 82.45
	Batch 195 of 390 training acc 81.626 val acc 80.63
	Batch 196 of 390 training acc 83.428 val acc 82.57
	Batch 197 of 390 training acc 82.428 val acc 81.17999999999999
	Batch 198 of 390 training acc 82.284 val acc 80.96
	Batch 199 of 390 training acc 74.574 val acc 74.14
	Batch 200 of 390 training acc 79.278 val acc 78.48
	Batch 201 of 390 training acc 74.09 val acc 73.38
	Batch 202 of 390 training acc 78.656 val acc 77.82
	Batch 203 of 390 training acc 81.95599999999999 val acc 81.28999999999999
	Batch 204 of 390 training acc 82.72800000000001 val acc 81.5
	Batch 205 of 390 training acc 82.16799999999999 val acc 81.16
	Batch 206 of 390 training acc 81.744 val acc 80.25
	Batch 207 of 390 training acc 79.998 val acc 78.89
	Batch 208 of 390 training acc 75.91 val acc 74.83999999999999
	Batch 209 of 390 training acc 82.17999999999999 val acc 81.15
	Batch 210 of 390 training acc 81.194 val acc 79.86
	Batch 211 of 390 training acc 80.006 v

	Batch 340 of 390 training acc 77.178 val acc 77.10000000000001
	Batch 341 of 390 training acc 79.554 val acc 78.67
	Batch 342 of 390 training acc 79.294 val acc 77.95
	Batch 343 of 390 training acc 79.892 val acc 78.73
	Batch 344 of 390 training acc 78.392 val acc 77.18
	Batch 345 of 390 training acc 81.322 val acc 80.30000000000001
	Batch 346 of 390 training acc 74.59400000000001 val acc 73.22999999999999
	Batch 347 of 390 training acc 79.366 val acc 78.35
	Batch 348 of 390 training acc 80.608 val acc 79.73
	Batch 349 of 390 training acc 83.684 val acc 82.25
	Batch 350 of 390 training acc 83.148 val acc 81.46
	Batch 351 of 390 training acc 77.99199999999999 val acc 76.41
	Batch 352 of 390 training acc 77.456 val acc 76.55999999999999
	Batch 353 of 390 training acc 73.24000000000001 val acc 71.81
	Batch 354 of 390 training acc 75.424 val acc 74.57000000000001
	Batch 355 of 390 training acc 75.81599999999999 val acc 74.68
	Batch 356 of 390 training acc 72.1 val acc 71.25
	Batch 357 of 

In [112]:
pred_softmax = softmax_fashion.predict(X_train_fashion)
print('The training accuracy is given by: %f' % (get_acc(pred_softmax, y_train_fashion)))

The training accuracy is given by: 84.950000


### Validate Softmax on Fashion-MNIST

In [113]:
pred_softmax = softmax_fashion.predict(X_val_fashion)
print('The validation accuracy is given by: %f' % (get_acc(pred_softmax, y_val_fashion)))

The validation accuracy is given by: 83.410000


### Testing Softmax on Fashion-MNIST

In [114]:
pred_softmax = softmax_fashion.predict(X_test_fashion)
print('The testing accuracy is given by: %f' % (get_acc(pred_softmax, y_test_fashion)))

The testing accuracy is given by: 82.850000


### Softmax_Fashion-MNIST Kaggle Submission

Once you are satisfied with your solution and test accuracy output a file to submit your test set predictions to the Kaggle for Assignment 1 Fashion-MNIST. Use the following code to do so:

In [115]:
output_submission_csv('kaggle/softmax_submission_fashion.csv', softmax_fashion.predict(X_test_fashion))

## Train Softmax on Mushroom

In [328]:
lr = 0.5
n_epochs = 10
reg_const = 0.05

softmax_MR = Softmax(n_class_MR, lr, n_epochs, reg_const)
softmax_MR.train(X_train_MR, y_train_MR,X_val_MR,y_val_MR)

epoch 0
	Batch 0 of 38 training acc 51.682396389002875 val acc 50.830769230769235
	Batch 1 of 38 training acc 51.682396389002875 val acc 50.830769230769235
	Batch 2 of 38 training acc 48.317603610997125 val acc 49.16923076923077
	Batch 3 of 38 training acc 51.682396389002875 val acc 50.830769230769235
	Batch 4 of 38 training acc 48.317603610997125 val acc 49.16923076923077
	Batch 5 of 38 training acc 51.682396389002875 val acc 50.830769230769235
	Batch 6 of 38 training acc 61.05867870332376 val acc 58.4
	Batch 7 of 38 training acc 53.959786622897 val acc 52.86153846153846
	Batch 8 of 38 training acc 63.8695116947066 val acc 61.41538461538462
	Batch 9 of 38 training acc 74.82560525235947 val acc 73.47692307692307
	Batch 10 of 38 training acc 71.00943783340172 val acc 68.73846153846154
	Batch 11 of 38 training acc 79.38038572014773 val acc 77.72307692307693
	Batch 12 of 38 training acc 65.12105047189168 val acc 63.01538461538462
	Batch 13 of 38 training acc 74.12802626179729 val acc 72.6

	Batch 3 of 38 training acc 87.79236766516209 val acc 86.09230769230768
	Batch 4 of 38 training acc 83.42224045958146 val acc 83.50769230769231
	Batch 5 of 38 training acc 87.81288469429627 val acc 86.21538461538462
	Batch 6 of 38 training acc 82.86828067295856 val acc 82.7076923076923
	Batch 7 of 38 training acc 88.63356585966352 val acc 86.64615384615385
	Batch 8 of 38 training acc 87.89495281083299 val acc 87.6923076923077
	Batch 9 of 38 training acc 83.09396799343455 val acc 83.2
	Batch 10 of 38 training acc 89.4337299958966 val acc 88.0
	Batch 11 of 38 training acc 75.42059909725072 val acc 74.95384615384614
	Batch 12 of 38 training acc 77.55437012720559 val acc 75.07692307692308
	Batch 13 of 38 training acc 68.97825194911776 val acc 69.9076923076923
	Batch 14 of 38 training acc 66.68034468608946 val acc 63.87692307692308
	Batch 15 of 38 training acc 59.3557652851867 val acc 60.184615384615384
	Batch 16 of 38 training acc 68.46532622076323 val acc 66.21538461538462
	Batch 17 of 38

In [329]:
pred_softmax = softmax_MR.predict(X_train_MR)
print('The training accuracy is given by: %f' % (get_acc(pred_softmax, y_train_MR)))

The training accuracy is given by: 90.418547


### Validate Softmax on Mushroom

In [330]:
pred_softmax = softmax_MR.predict(X_val_MR)
print('The validation accuracy is given by: %f' % (get_acc(pred_softmax, y_val_MR)))

The validation accuracy is given by: 90.338462


### Testing Softmax on Mushroom

In [331]:
pred_softmax = softmax_MR.predict(X_test_MR)
print('The testing accuracy is given by: %f' % (get_acc(pred_softmax, y_test_MR)))

The testing accuracy is given by: 89.723077


# Logistic Classifier

The Logistic Classifier has 2 hyperparameters that you can experiment with:
- **Learning rate** - similar to as defined above in Perceptron, this parameter scales by how much the weights are changed according to the calculated gradient update. 
- **Number of Epochs** - As described for perceptron.
- **Threshold** - The decision boundary of the classifier.


You will implement the Logistic Classifier in the **models/logistic.py**

The following code: 
- Creates an instance of the Logistic classifier class 
- The train function of the Logistic class is trained on the training data
- We use the predict function to find the training accuracy as well as the testing accuracy

In [1244]:
## Codes for logistic Regressor

In [322]:
"""Logistic regression model."""

import numpy as np


class Logistic:
    def __init__(self, lr: float, epochs: int, threshold: float):
        """Initialize a new classifier.

        Parameters:
            lr: the learning rate
            epochs: the number of epochs to train for
        """
        self.w = None  # TODO: change this
        self.lr = lr
        self.epochs = epochs
        self.threshold = threshold

    def sigmoid(self, z: np.ndarray) -> np.ndarray:
        """Sigmoid function.

        Parameters:
            z: the input

        Returns:
            the sigmoid of the input
        """
        # TODO: implement me
        return 1/(1+ np.exp(-z))

    def train(self, X_train: np.ndarray, y_train: np.ndarray):
        """Train the classifier.

        Use the logistic regression update rule as introduced in lecture.

        Parameters:
            X_train: a numpy array of shape (N, D) containing training data;
                N examples with D dimensions
            y_train: a numpy array of shape (N,) containing training labels
        """
        # TODO: implement me
        print(X_train.shape)
        random.seed(5)
        upper_b = np.min(X_train)
        lower_b = np.max(X_train)
        self.w = np.zeros(X_train.shape[1])
        print("w",self.w)
        
        for epoch in range(self.epochs):
            cnt = 0
            # for each of the training data
            for i in range(X_train.shape[0]):
                  grad_w = X_train[i] * (y_train[i] - self.sigmoid(np.dot(self.w, X_train[i])))
                  self.w += self.lr*grad_w
            print("\t","epoch",epoch,"training")
        print(self.w)
            
    def predict(self, X_test: np.ndarray) -> np.ndarray:
        """Use the trained weights to predict labels for test data points.

        Parameters:
            X_test: a numpy array of shape (N, D) containing testing data;
                N examples with D dimensions

        Returns:
            predicted labels for the data in X_test; a 1-dimensional array of
                length N, where each element is an integer giving the predicted
                class.
        """
        # TODO: implement me
        ret = []
        for i in range(len(X_test)):
            score = np.dot(self.w,X_test[i])
#             print("!!!",score,)
#             print(X_test[i])
#             print("score",score)
#             print(self.sigmoid(score))
            if self.sigmoid(score) >= threshold:
                ret.append(1)
            else:
                ret.append(0)
#         print(ret)
        return np.array(ret)

### Training Logistic Classifer

In [323]:
learning_rate = 0.01
n_epochs = 10
threshold = 0.5

lr = Logistic(learning_rate, n_epochs, threshold)
lr.train(X_train_MR, y_train_MR)

(4874, 22)
w [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
	 epoch 0 training
	 epoch 1 training
	 epoch 2 training
	 epoch 3 training
	 epoch 4 training
	 epoch 5 training
	 epoch 6 training
	 epoch 7 training
	 epoch 8 training
	 epoch 9 training
[-0.05059437  0.6329352  -0.04606689 -1.01571977 -0.28967443  1.26200838
 -3.26412716  5.22235985 -0.23821752 -1.0674978  -1.40697891 -2.70207919
 -0.59391416 -0.12607478 -0.09474378  0.          3.25956399 -0.59241467
  0.21116122 -0.45914512  0.26218896  0.20340367]


In [324]:
pred_lr = lr.predict(X_train_MR)
print('The training accuracy is given by: %f' % (get_acc(pred_lr, y_train_MR)))

The training accuracy is given by: 90.951990


### Validate Logistic Classifer

In [325]:
pred_lr = lr.predict(X_val_MR)
print('The validation accuracy is given by: %f' % (get_acc(pred_lr, y_val_MR)))

The validation accuracy is given by: 89.784615


### Test Logistic Classifier

In [326]:
pred_lr = lr.predict(X_test_MR)
print('The testing accuracy is given by: %f' % (get_acc(pred_lr, y_test_MR)))

The testing accuracy is given by: 90.646154
