In [1]:
import random
import numpy as np
from data_process import get_CIFAR10_data
from scipy.spatial import distance
%matplotlib inline
from save_submission import output_submission_csv

# Loading CIFAR-10

In the following cells we determine the number of images for each split and load the images.
<br /> 
TRAIN_IMAGES + VAL_IMAGES = (0, 50000]
, TEST_IMAGES = 10000

In [2]:
# You can change these numbers for experimentation
# For submission we will use the default values 
TRAIN_IMAGES = 4000
VAL_IMAGES = 1000

In [3]:
data = get_CIFAR10_data(TRAIN_IMAGES, VAL_IMAGES)
X_train_CIFAR, y_train_CIFAR = data['X_train'], data['y_train']
X_val_CIFAR, y_val_CIFAR = data['X_val'], data['y_val']
X_test_CIFAR, y_test_CIFAR = data['X_test'], data['y_test']
n_class_CIFAR = len(np.unique(y_test_CIFAR))

Convert the sets of images from dimensions of **(N, 3, 32, 32) -> (N, 3072)** where N is the number of images so that each **3x32x32** image is represented by a single vector.

In [4]:
X_train_CIFAR = np.reshape(X_train_CIFAR, (X_train_CIFAR.shape[0], -1))
X_val_CIFAR = np.reshape(X_val_CIFAR, (X_val_CIFAR.shape[0], -1))
X_test_CIFAR = np.reshape(X_test_CIFAR, (X_test_CIFAR.shape[0], -1))

# Perceptron

Perceptron has 2 hyperparameters that you can experiment with:
- **Learning rate** - controls how much we change the current weights of the classifier during each update. We set it at a default value of 0.5, but you should experiment with different values. We recommend changing the learning rate by factors of 10 and observing how the performance of the classifier changes. You should also try adding a **decay** which slowly reduces the learning rate over each epoch.
- **Number of Epochs** - An epoch is a complete iterative pass over all of the data in the dataset. During an epoch we predict a label using the classifier and then update the weights of the classifier according the perceptron update rule for each sample in the training set. You should try different values for the number of training epochs and report your results.

You will implement the Perceptron classifier in the **models/Perceptron.py**

The following code: 
- Creates an instance of the Perceptron classifier class 
- The train function of the Perceptron class is trained on the training data
- We use the predict function to find the training accuracy as well as the testing accuracy


# Model Perceptron

In [7]:
import numpy as np


class Perceptron:
    def __init__(self, n_class: int, lr: float, epochs: int):
        """Initialize a new classifier.
        Parameters:
            n_class: the number of classes
            lr: the learning rate
            epochs: the number of epochs to train for
        """
        self.w1 = None
        self.w2 = None
        self.b1 = None
        self.b2 = None
        self.lr = lr
        self.epochs = epochs
        self.n_class = n_class
        
    def sigmoid(self, s):
        
        # Normalize due to a warning 
        s = s / np.linalg.norm(s, axis=-1)[:, np.newaxis]
        return 1 / (1 + np.exp(-s))
    
    def softmax(self, s):
        
        # Normalize due to a warning 
        s = s / np.linalg.norm(s, axis=-1)[:, np.newaxis]
        return np.exp(s) / np.sum(np.exp(s), axis = 0)

    def train(self, X_train: np.ndarray, y_train: np.ndarray):
        
        """Train the classifier.
        Use the perceptron update rule as introduced in Lecture 3.
        Parameters:
            X_train: a number array of shape (N, D) containing training data;
                N examples with D dimensions
            y_train: a numpy array of shape (N,) containing training labels
        """
        
        N, D = X_train.shape
        self.w = np.random.randn(self.n_class, D)

        ###### YOUR CODE STARTS HERE ######
        
        
#       initalize params

        self.w1 = np.random.rand(self.n_class, D)
        self.w2 = np.random.rand(self.n_class, self.n_class) 
        self.b1 = np.random.rand(self.n_class, 1) 
        self.b2 = np.random.rand(self.n_class, 1)
        
        for i in range(self.epochs):
            
#           forward propergation

            linear_output1 = np.dot(self.w1,X_train.T) + self.b1
            output1 = self.sigmoid(linear_output1)
            linear_output2 = np.dot(self.w2,output1) + self.b2
            output2 = self.softmax(linear_output2)
        
#           backward propergation
            
            actual_y = np.zeros((y_train.size, y_train.max() + 1))
            actual_y[np.arange(y_train.size), y_train] = 1
            print('Acyi', actual_y.shape)
            print('out2', output2.shape)
            diff = output2 - actual_y.T
            update_W2 = 1 / N * np.dot(diff,output1.T)
            update_b2 = 1 / N * np.sum(diff)
            s = self.sigmoid(linear_output1)
            update_1 = np.dot(self.w2.T,diff) * s * (1 - s)
            update_W1 = 1 / N * update_1.dot(X_train)
            update_b1 = 1 / N * np.sum(update_1)
            
#           update the parameters

            self.w1 = self.w1 - self.lr * update_W1
            self.w2 = self.w2 - self.lr * update_W2
            self.b1 = self.b1 - self.lr * update_b1    
            self.b2 = self.b2 - self.lr * update_b2 
        
            
            ###### YOUR CODE ENDS HERE ######
            


    def predict(self, X_test: np.ndarray) -> np.ndarray:
        
        """Use the trained weights to predict labels for test data points.
        Parameters:
            X_test: a numpy array of shape (N, D) containing testing data;
                N examples with D dimensions
        Returns:
            predicted labels for the data in X_test; a 1-dimensional array of
                length N, where each element is an integer giving the predicted
                class.
        """
        
        N, D = X_test.shape
        y_test = np.zeros(N)
        
        ###### YOUR CODE STARTS HERE ######
        
#       forward_prop

        linear_output1 = np.dot(self.w1,X_test.T) + self.b1
        output1 = self.sigmoid(linear_output1)
        linear_output2 = np.dot(self.w2,output1) + self.b2
        output2 = self.softmax(linear_output2)
            
        y_test = np.argmax(output2, 0)
        
        ###### YOUR CODE ENDS HERE ######
        
        return y_test

## Train Perceptron on CIFAR

In [8]:
lr = 0.2
n_epochs = 25

percept_CIFAR = Perceptron(n_class_CIFAR, lr, n_epochs)
percept_CIFAR.train(X_train_CIFAR, y_train_CIFAR.T)

Acyi (4000, 10)
out2 (10, 4000)
Acyi (4000, 10)
out2 (10, 4000)
Acyi (4000, 10)
out2 (10, 4000)
Acyi (4000, 10)
out2 (10, 4000)
Acyi (4000, 10)
out2 (10, 4000)
Acyi (4000, 10)
out2 (10, 4000)
Acyi (4000, 10)
out2 (10, 4000)
Acyi (4000, 10)
out2 (10, 4000)
Acyi (4000, 10)
out2 (10, 4000)
Acyi (4000, 10)
out2 (10, 4000)
Acyi (4000, 10)
out2 (10, 4000)
Acyi (4000, 10)
out2 (10, 4000)
Acyi (4000, 10)
out2 (10, 4000)
Acyi (4000, 10)
out2 (10, 4000)
Acyi (4000, 10)
out2 (10, 4000)
Acyi (4000, 10)
out2 (10, 4000)
Acyi (4000, 10)
out2 (10, 4000)
Acyi (4000, 10)
out2 (10, 4000)
Acyi (4000, 10)
out2 (10, 4000)
Acyi (4000, 10)
out2 (10, 4000)
Acyi (4000, 10)
out2 (10, 4000)
Acyi (4000, 10)
out2 (10, 4000)
Acyi (4000, 10)
out2 (10, 4000)
Acyi (4000, 10)
out2 (10, 4000)
Acyi (4000, 10)
out2 (10, 4000)


In [195]:
pred_percept = percept_CIFAR.predict(X_test_CIFAR)
print('The training accuracy is given by: %f' % (get_acc(pred_percept, y_test_CIFAR)))

[3 8 8 ... 3 3 4] [3 8 8 ... 5 1 7]
The training accuracy is given by: 0.202700


### Validate Perceptron on CIFAR

In [196]:
pred_percept = percept_CIFAR.predict(X_val_CIFAR)
print('The validation accuracy is given by: %f' % (get_acc(pred_percept, y_val_CIFAR)))

[8 0 8 9 3 8 1 3 9 8 3 3 3 1 3 3 9 9 8 3 1 8 3 9 3 3 3 8 9 8 8 3 5 3 4 5 8
 9 3 3 9 9 3 3 3 0 8 3 8 0 9 8 3 9 4 3 8 3 9 8 8 8 5 3 3 9 9 9 8 9 3 9 3 9
 3 3 9 9 9 8 3 3 8 9 3 8 3 3 3 3 3 3 0 8 9 3 8 3 3 9 3 3 3 3 3 3 3 8 3 3 8
 9 3 8 3 1 3 9 5 9 3 1 3 9 3 3 3 3 9 8 3 1 3 9 9 9 9 3 8 3 8 8 9 4 3 5 3 3
 3 3 9 3 9 3 3 8 8 8 9 8 3 8 9 0 9 8 3 3 3 8 8 8 9 3 8 0 9 9 3 3 1 3 9 8 3
 1 3 7 3 3 3 8 9 3 3 0 1 3 8 3 3 3 0 9 3 5 9 8 9 3 8 3 3 3 8 9 9 3 5 3 5 8
 8 9 3 8 8 4 8 8 3 3 3 8 9 3 9 8 8 8 8 8 0 3 3 8 8 7 8 8 3 8 3 3 9 3 8 0 9
 4 3 9 3 3 9 3 3 3 3 8 8 8 3 0 8 9 0 8 6 3 3 8 3 5 3 7 9 8 9 3 3 9 3 3 3 0
 8 3 8 3 8 3 3 9 9 3 0 3 3 3 3 8 3 9 3 8 8 3 3 9 8 3 8 3 3 3 9 3 3 0 9 9 9
 3 8 9 3 3 8 9 9 6 3 9 8 3 0 0 8 4 3 9 3 0 9 8 9 9 9 3 9 3 3 5 8 3 3 3 8 9
 8 9 8 8 9 8 3 3 3 3 8 3 3 8 3 8 9 8 8 3 8 3 5 3 4 3 8 8 3 9 3 8 8 3 3 9 3
 9 1 9 5 3 9 3 3 3 8 3 8 8 8 3 3 3 9 4 3 9 5 3 3 8 8 3 3 0 9 3 3 3 3 3 3 1
 9 9 3 3 0 9 3 3 3 8 8 8 3 8 9 3 5 9 5 5 3 3 8 3 8 8 8 8 0 9 8 3 9 8 4 3 8
 3 3 3 3 9 9 9 3 8 8 4 9 

### Test Perceptron on CIFAR

In [197]:
pred_percept = percept_CIFAR.predict(X_test_CIFAR)
print('The testing accuracy is given by: %f' % (get_acc(pred_percept, y_test_CIFAR)))

[3 8 8 ... 3 3 4] [3 8 8 ... 5 1 7]
The testing accuracy is given by: 0.202700


In [198]:
output_submission_csv('output/Perceptron_submission_CIFAR.csv', percept_CIFAR.predict(X_test_CIFAR))

# Logistic Classifier

The Logistic Classifier has 2 hyperparameters that you can experiment with:
- **Learning rate** - similar to as defined above in Perceptron, this parameter scales by how much the weights are changed according to the calculated gradient update. 
- **Number of Epochs** - As described for perceptron.



You will implement the Logistic Classifier in the **models/Logistic.py**

The following code: 
- Creates an instance of the Logistic classifier class 
- The train function of the Logistic class is trained on the training data
- We use the predict function to find the training accuracy as well as the testing accuracy

In [298]:
"""Logistic regression model."""

import numpy as np
import math


class Logistic:
    def __init__(self, lr: float, epochs: int):
        """Initialize a new classifier.
        Parameters:
            lr: the learning rate
            epochs: the number of epochs to train for
        """
        self.w = None
        self.lr = lr
        self.epochs = epochs
        self.threshold = 0.5
        
    def sigmoid(self, Z: np.ndarray) -> np.ndarray:
        
        """Sigmoid function.
        Parameters:
            z: the input
        Returns:
            the sigmoid of the input
        """        
        Z = Z / np.linalg.norm(Z, axis=-1)[:, np.newaxis]
        return 1 / (1 + np.exp(-Z))
        
        
    def train(self, X_train: np.ndarray, y_train: np.ndarray):
        
        """Train the classifier.
        Use the logistic regression update rule as introduced in lecture.
        Parameters:
            X_train: a numpy array of shape (N, D) containing training data;
                N examples with D dimensions
            y_train: a numpy array of shape (N,) containing training labels
        """
        D, N = X_train.shape
        self.w = np.random.randn(1, D)
        self.bias = 0
        
        ##### YOUR CODE STARTS HERE #####
        
        for _ in range(self.epochs):
            linear_model = np.dot(self.w, X_train) + self.bias
            y_predicted = self.sigmoid(linear_model)
            
            update_w = np.dot((y_predicted - y_train), X_train.T)
            update_b = np.sum(y_predicted - y_train)
            
            self.w -= self.lr * update_w
            self.bias -= self.lr * update_b
        
        ##### YOUR CODE ENDS HERE #####
        
        
    def predict(self, X_test: np.ndarray, y_train) -> np.ndarray:
        
        """Use the trained weights to predict labels for test data points.
        Parameters:
            X_test: a numpy array of shape (N, D) containing testing data;
                N examples with D dimensions
        Returns:
            predicted labels for the data in X_test; a 1-dimensional array of
                length N, where each element is an integer giving the predicted
                class.
        """
        N, D = X_test.shape
        y_test = np.zeros(N)
        
        ##### YOUR CODE STARTS HERE #####
        
        linear_model = np.dot(X_test, self.w.T) + self.bias
        y_predicted = self.sigmoid(linear_model)
        y_predicted = y_predicted
        y_predicted_class = [1 if i > self.threshold else 0 for i in y_predicted]  
        
        ##### YOUR CODE ENDS HERE #####
        
        return y_predicted_class
    
    def get_accuracy(self,predictions, Y):
        
        return np.sum(predictions == Y) / Y.size

### Training Logistic Classifer

In [299]:
learning_rate = 0.2
n_epochs = 30

lr = Logistic(learning_rate, n_epochs)
lr.train(X_train_CIFAR.T, y_train_CIFAR.T)

In [300]:
pred_lr = lr.predict(X_train_CIFAR, y_train_CIFAR)
print('The training accuracy is given by: %f' % (get_acc(pred_lr, y_train_CIFAR)))

[0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 

### Validate Logistic Classifer

In [301]:
pred_lr = lr.predict(X_val_CIFAR,y_val_CIFAR)
print('The validation accuracy is given by: %f' % (get_acc(pred_lr, y_val_CIFAR)))

[0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 

### Test Logistic Classifier

In [288]:
pred_lr = lr.predict(X_test_CIFAR, y_test_CIFAR)
print('The testing accuracy is given by: %f' % (get_acc(pred_lr, y_test_CIFAR)))
output_submission_csv('output/Logistic_submission_CIFAR.csv', lr.predict(X_test_CIFAR, y_test_CIFAR))

[0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 