# COMP47590: Advanced Machine Learning
# Assignment 1: Implementing Perceptrons

- Student 1 Name: Carl Fabian Winkler
- Student 1 Number: 20207528
- Student 2 Name: David Moreno Boras
- Student 2 Number: 21200646

## Import Packages

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import math
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import train_test_split

"""
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split

from sklearn.utils.validation import check_X_y, check_array, check_is_fitted, check_random_state
from sklearn.utils.multiclass import unique_labels
from sklearn import preprocessing
from sklearn import metrics
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.utils import resample"""

'\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.model_selection import train_test_split\n\nfrom sklearn.utils.validation import check_X_y, check_array, check_is_fitted, check_random_state\nfrom sklearn.utils.multiclass import unique_labels\nfrom sklearn import preprocessing\nfrom sklearn import metrics\nfrom sklearn.model_selection import GridSearchCV\nfrom sklearn.model_selection import cross_val_score\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.utils import resample'

## Task 1: The Perceptron Classifier

Define the PerceptronClassifier class

In [89]:
class Layer():
    def __init__(self, n_in, n_out, activation = 'Sigmoid', init = 'Xavier'):
        self.activation = activation
        # XW + b = y ; We input more than one sample per pass...
        
        self.weights = np.zeros((n_in, n_out))
        self.biases = np.zeros((n_out))
        if init == 'Xavier':
            var = np.sqrt(6.0 / (n_in + n_out))
            for i in range(n_in):
                for j in range(n_out):
                      self.weights[i,j] = np.float32(np.random.uniform(-var, var))
        
        self.d_w = np.zeros(weights.shape)
        self.d_b = np.zeros(biases.shape)
        #print("Weights:", self.weights.shape)
        #print(self.weights) 
        
    def getWeights(self):
        return self.weights

    def forward(self, x):
        """print("X:", x.shape)
        print(x)
        print("Weights:", self.weights.shape)
        print(self.weights)"""
        z = x @ self.weights + self.biases
        
        if self.activation == 'Sigmoid':
            out = 1 / (1 + np.exp(-z))
        elif self.activation == 'ReLu':
            out = np.maximum(z, 0)
        elif self.activation == 'TanH':
            out = np.tanh(z)
        else:
            out = z
        
        self.cache = (x, z)
        
        return out    
    
    def backward(self, d_out):
        inputs, z = self.cache
        weight = self.weights
        bias = self.biases
        
        if self.activation == 'Sigmoid':
            d_act = d_out * (1 / (1 + np.exp(-z))) * (1 - 1 / (1 + np.exp(-z)))
        elif self.activation == 'ReLu':
            d_act = d_out * (z > 0)
            
        elif self.activation == 'TanH':
            d_act = d_out * np.square(z)
        else:
            d_act = z
            
        d_inputs = d_act @ weight.T
        self.d_w = inputs.T @ d_act
        self.d_b = d_act.sum(axis=0) 
        
        return d_inputs, self.d_w, self.d_b
    
    def update_gd_params(self, lr):
        self.weights = self.weights - lr * self.d_w
        self.biases = self.biases - lr * self.d_b

class PerceptronClassifier(BaseEstimator, ClassifierMixin, ):
    
    """
    Parameters
    ----------
    Attributes
    ----------
    Notes
    -----
    See also
    --------
    Examples
    --------
    """
    # Constructor for the classifier object
    def __init__(self, in_dim, out_dim, hidden_units, n_layers, activation = 'Sigmoid', 
                 learning_rate = 0.01, weight_decay = 0, epochs = -1, regularisation = 'L2'):

        """Setup a Perceptron classifier .
        Parameters
        ----------
        Returns
        -------
        """     
        
        self.layers = []
        self.lr = learning_rate
        self.regularisation = regularisation
        
        self.layers.append(Layer(in_dim, hidden_units, activation, 'Xavier'))
        for l in range(n_layers):
            self.layers.append(Layer(hidden_units, hidden_units, activation, 'Xavier'))
        self.layers.append(Layer(hidden_units, out_dim, activation, 'Xavier'))
        
        print("Layers:", len(self.layers))
        
        # Initialise class variabels
    def forward(self, X):
        out = self.layers[0].forward(X)
        for layer in self.layers[1:]:
            out = layer.forward(out)
        return out
                
    def backward(self, in_grad):
        i = len(self.layers) - 2 
        # d_inputs, _, _ = lay.backward(in_grad)
        next_grad = self.layers[i+1].backward(in_grad)
        while i >= 1:
            next_grad = self.layers[i].backward(next_grad)
            i -= 1
        
    def l2_loss(self, y_hat, pred):
        # totalSum = 0
        # for layer in self.layers:
        #     totalSum = totalSum + np.sum(np.sum(layer.getWeights())
        return -y_hat-np.squeeze(pred)

    def loss(self, y_hat, pred):
        return -np.expand_dims(y_hat-np.squeeze(pred),axis=1)
        
    # The fit function to train a classifier
    def fit(self, X, y, epochs = 30):
        # WRITE CODE HERE
        for i in range(epochs):
            out = self.forward(X)
            print("Prediction:",out)
            if (self.regularisation == 'L2'):
                grad = self.l2_loss(y, out)
            else:
                grad = self.loss(y, out)
            
            # Backpropagation
            self.backward(grad)
            
            # Update weights and biases
            for layer in self.layers:
                layer.update_gd_params(self.lr)
        return
    
    # The predict function to make a set of predictions for a set of query instances
    def predict(self, X):
        return self.forward(X)
    
    # The predict_proba function to make a set of predictions for a set of query instances. This returns a set of class distributions.
    def predict_proba(self, X):
        tmp = self.forward(X)
        sum1 = tmp.sum(axis = 1)
        out = X.T / sum1
        out = out.T
        return out

## Task 2: Evaluation

Load the Diabethic Retinopathy dataset

In [3]:
x = np.array([[1,1],[2,2]])


D = 2
N = 2
H = 1


weights = np.ones((2,1))
biases = np.ones((1))  
              


In [115]:
#(self, in_dim, out_dim, hidden_units, n_layers,
x = np.array([[0,0]])
y = np.array([1])
clf = PerceptronClassifier(2, 1, 2, 0, regularisation='None', learning_rate = 10,activation='Sigmoid')
clf.fit(x, y,epochs=700)

Layers: 2
Prediction: [[0.37464357]]
Prediction: [[0.84360968]]
Prediction: [[0.88025311]]
Prediction: [[0.89881827]]
Prediction: [[0.91069826]]
Prediction: [[0.91917055]]
Prediction: [[0.9256154]]
Prediction: [[0.93073483]]
Prediction: [[0.93493028]]
Prediction: [[0.93845065]]
Prediction: [[0.94145982]]
Prediction: [[0.94407074]]
Prediction: [[0.94636418]]
Prediction: [[0.94839967]]
Prediction: [[0.95022216]]
Prediction: [[0.95186636]]
Prediction: [[0.95335952]]
Prediction: [[0.9547234]]
Prediction: [[0.95597561]]
Prediction: [[0.95713058]]
Prediction: [[0.95820023]]
Prediction: [[0.95919457]]
Prediction: [[0.96012201]]
Prediction: [[0.96098972]]
Prediction: [[0.96180382]]
Prediction: [[0.96256961]]
Prediction: [[0.96329166]]
Prediction: [[0.96397398]]
Prediction: [[0.96462007]]
Prediction: [[0.96523302]]
Prediction: [[0.96581555]]
Prediction: [[0.96637011]]
Prediction: [[0.96689884]]
Prediction: [[0.96740369]]
Prediction: [[0.9678864]]
Prediction: [[0.96834853]]
Prediction: [[0.96879

Prediction: [[0.99270501]]
Prediction: [[0.99271075]]
Prediction: [[0.99271647]]
Prediction: [[0.99272218]]
Prediction: [[0.99272787]]
Prediction: [[0.99273356]]
Prediction: [[0.99273923]]
Prediction: [[0.99274488]]
Prediction: [[0.99275053]]
Prediction: [[0.99275616]]
Prediction: [[0.99276177]]
Prediction: [[0.99276738]]
Prediction: [[0.99277297]]
Prediction: [[0.99277855]]
Prediction: [[0.99278411]]
Prediction: [[0.99278967]]
Prediction: [[0.99279521]]
Prediction: [[0.99280073]]
Prediction: [[0.99280625]]
Prediction: [[0.99281175]]
Prediction: [[0.99281724]]
Prediction: [[0.99282272]]
Prediction: [[0.99282818]]
Prediction: [[0.99283363]]
Prediction: [[0.99283907]]
Prediction: [[0.9928445]]
Prediction: [[0.99284991]]
Prediction: [[0.99285532]]
Prediction: [[0.99286071]]
Prediction: [[0.99286609]]
Prediction: [[0.99287145]]
Prediction: [[0.99287681]]
Prediction: [[0.99288215]]
Prediction: [[0.99288748]]
Prediction: [[0.9928928]]
Prediction: [[0.99289811]]
Prediction: [[0.9929034]]
Pred

In [82]:

x = np.array([[0.74552196],
 [0.76334402]])
y = np.ones(2)

In [83]:
np.expand_dims(np.squeeze(x)-y,axis=1)

array([[-0.25447804],
       [-0.23665598]])

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 2 is different from 1)

Evaluate the perfomrance of the perceptron classifier on the daibetic retinopathy dataset.

In [40]:
x = np.array([1,1])
clf.predict(x)


array([0.47069059])

In [None]:
plt.plot(outpur)

## Task 3 & 4: Add Different Activations & Regularisation

Reimplement the PerceptronClassifier class adding an activation function option and L2 regularisation. 

In [None]:
class PerceptronClassifier2(BaseEstimator, ClassifierMixin):
    """
    """
    # Constructor for the classifier object
    def __init__(self):

        """Setup a Perceptron classifier .
        Parameters
        ----------

        
        Returns
        -------

        """     

        # Initialise ranomd state if set
        self.random_state = random_state
        
        # Initialise class variabels

        
    # The fit function to train a classifier
    def fit(self, X, y):
        
        # WRITE CODE HERE
        

    # The predict function to make a set of predictions for a set of query instances
    def predict(self, X):

        # WRITE CODE HERE
    
    # The predict_proba function to make a set of predictions for a set of query instances. This returns a set of class distributions.
    def predict_proba(self, X):
        
        # WRITE CODE HERE

Load the dataset and explore it.

## Task 5: Reflect on the Performance of the Different Models Evaluated

Perform hyper-parameter tuning and evaluate models. 

## Test the Diabetic Retiniphaty dataset

In [None]:
diabetic_af = pd.read_csv('messidor_features.csv', na_values='?', index_col = 0)
diabetic_af.head()
y = diabetic_af.pop('Class').values
x_raw = diabetic_af.values
print("Features: ", x_raw[0:2])
print("Class: ", y[0:10])

### Train and predict using our classifier

With a single split

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x_raw, y, shuffle=True, train_size = 0.7)
clf = PerceptronClassifier(len(x_train[0]), 1, )
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
accuracy = metrics.accuracy_score(y_pred, y_test)

### Grid search

Do grid search with the train set, use the test set for evaluation

In [None]:
cv_folds = 5
param_grid ={'activation': ['Sigmoid', 'ReLu', 'TanH'], 'regularisation':['None', 'L2']}

# Perform the search
tuned_perceptron = GridSearchCV(PerceptronClassifier(), \
                            param_grid, cv=cv_folds, verbose = 2, \
                            n_jobs = -1)
cross_val_score(clf, x_train, y_train, cv=10)