This notebook references Lecture 7 from the Deep Learning course by Proffessor Bryce.

This notebook implements a feed forward neural network with the following activation functions:
1. Linear 
2. Logistic
3. Tanh
4. ReLu

Then, Gradient Descent Technique is used to train the model.

Regression Dataset -> https://archive.ics.uci.edu/dataset/360/air+quality \
Classification Dataset -> https://archive.ics.uci.edu/dataset/728/toxicity-2 

A single neuron is divided as the following in the larger picture.

neuron = weighted average + activation function

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### Sample Synthetic data

In [2]:
# Synthetic data for regression and classification

def synthetic_regression_data(N, # number of datapoints to generate
                              n_input, # number of input variables 
                              ): 
    
    # 1. determine weights and bias randomly for the data
    W = np.random.randn(n_input)
    b = np.random.randn()
    
    # 2. Generate data
    X = np.random.randint(10, size=(N, n_input))
    y = X.dot(W) + b + np.random.rand(N)
    
    
    # 4. return the W, b and data
    return X, y
    
    
def synthetic_classification_data(N, n_input):
    
    # 1. determine weights and bias randomly for the data
    W = np.random.randn(n_input)
    b = np.random.randn()
    
    # 2. Generate data
    X = np.random.randint(10, size=(N, n_input))
    y_ = X.dot(W) + b + np.random.rand()
    threshold = np.random.random()
    y = (y_ >= threshold).astype(int)
    
    
    # 4. return the W, b and data
    return threshold, X, y
    

# Skeleton of Feed Forward Neural Network

1. Input Layer \
 => (N, M) where N is the number of data points & M is the number of input neurons \
 => attribute = "input" (1-to-1 input mapping) \
 => activation = None 

 2. Hidden Layers \
 => (N, Hi) where Hi is the number of neurons in the ith layer \
 => attribute = "hidden" \
 => activation = linear/tanh/sigmoid/ReLu 

 3. Output Layer \
 => (N, O) where O is the number of output neurons \
 => attribute = "output" \
 => activation = linear/sigmoid depending on the task type

In [122]:
class Neuron():
    
    def __init__(self, n_features, activation = None):
        self.activation = activation
        self.W = np.random.randn(n_features)
        self.b = np.random.randn()
            
    
    def predict(self, X, train=None):
        
        y = self.linear_(X, self.W, self.b)
        
        if self.activation == "linear":
            return y
        elif self.activation == "sigmoid":
            return self.sigmoid_(y)
        elif self.activation == "tanh":
            return self.tanh_(y)
        elif self.activation == "ReLu":
            return self.relu_(y)
        
    
    def sigmoid_(self, X):
        return 1/ (1 + np.exp(-X))
    
    def tanh_(self, X):
        return np.tanh(X)

    def relu_(self, X):
        return np.maximum(0, X, axis=1)

    def linear_(self, X, W, b):
        return X.dot(W) + b
    
    def update_(self, W_grad, b_grad,alpha):
        self.W -= (alpha * W_grad)
        self.b -= (alpha * b_grad)
    
        
class Layer():
    
    def __init__(self, size = (None, None, None), activation = None):
        N, n_features, n_neurons = size
        
        self.size = size
        self.activation = activation
        self.neurons = {}
        
        for i in range(n_neurons):
            self.neurons[f'neuron_{i+1}'] = Neuron(n_features, self.activation)
                
    def predict(self, X, train = None):
        outputs = {}
        i=0
        for neuron_name, neuron in self.neurons.items():
            
            outputs[neuron_name] = np.array(neuron.predict(X, train)).reshape(-1, 1)
            #print(outputs[neuron_name].shape)
            
        return np.hstack([*outputs.values()])
    
    def update(self, W_grad, b_grad, alpha):
        for _, neuron in self.neurons.items():
            neuron.update_(W_grad, b_grad, alpha)
            
            
class RegressionModel:
    
    def __init__(self, input_size):
        
        N, _ = input_size
        
        self.layer1 = Layer((*input_size, 3), activation="tanh")
        self.layer2 = Layer((N, 3, 2), activation="tanh")
        self.output_layer = Layer((N, 2, 1), activation="linear")
        
    def predict(self, X):
        y = self.layer1.predict(X)
        #print(y.shape)
        y = self.layer2.predict(y)
        #print(y.shape)
        yo = self.output_layer.predict(y)
        #print(yo.shape)
        
        return y, yo
    
    def update(self, W_grad, b_grad, alpha):
        self.output_layer.update(W_grad, b_grad, alpha)
        
    def fit(self, X, y, alpha = 0.01, epochs = 100):
        
        m, n = X.shape
        self.alpha = alpha
        
        for epoch in range(epochs):
            # 1. Compute Weighted Sum
            yo, z = self.predict(X)
            
            error = y - z.reshape(-1, )

            W_grad = (-2/m) * yo.T.dot(error)
            b_grad = (-2/m) * np.sum(error)
            
            self.update(W_grad, b_grad, alpha)

            #print(f"Epoch {epoch+1} => ", np.mean(error ** 2))
            
            
class ClassificationModel:
    
    def __init__(self, input_size, threshold):
        
        N, _ = input_size
        self.threshold = threshold
        self.layer1 = Layer((*input_size, 3), activation="tanh")
        self.layer2 = Layer((N, 3, 2), activation="tanh")
        self.output_layer = Layer((N, 2, 1), activation="sigmoid")
        
    def predict(self, X):
        y = self.layer1.predict(X)
        #print(y.shape)
        y = self.layer2.predict(y)
        #print(y.shape)
        yo = self.output_layer.predict(y)
        #print(yo.shape)
        
        return y, yo 

    def predict_(self, X):
        y = self.layer1.predict(X)
        print(y.shape)
        y = self.layer2.predict(y)
        print(y.shape)
        y = self.output_layer.predict(y)
        print(y.shape)
        
        return (y >= self.threshold).astype(int)
    
    def update(self, W_grad, b_grad, alpha):
        self.output_layer.update(W_grad, b_grad, alpha)
        
    def fit(self, X, y, alpha = 0.01, epochs = 100):
        
        m, n = X.shape
        self.alpha = alpha
        
        for epoch in range(epochs):
            # 1. Compute Weighted Sum
            yo, z = self.predict(X)
            
            error = y - z.reshape(-1, )

            W_grad = (-2/m) * yo.T.dot(error)
            b_grad = (-2/m) * np.sum(error)
            
            self.update(W_grad, b_grad, alpha)

            #print(f"Epoch {epoch+1} => ", np.mean(error ** 2))
                    
                    

Testing on synthetic regression data

In [123]:
X_reg, y_reg = synthetic_regression_data(100, 5)

reg_model = RegressionModel((100, 5))

reg_model.fit(X_reg, y_reg)
print("Fit complete")

_, y_pred = reg_model.predict(X_reg)

Fit complete


Testing on synthetic classification data

In [124]:
cls_threshold, X_cls, y_cls = synthetic_classification_data(100, 5)

cls_model = ClassificationModel((100, 5), 0.5)

cls_model.fit(X_cls, y_cls)
print("Fit complete")

y_pred = cls_model.predict_(X_cls)

Fit complete
(100, 3)
(100, 2)
(100, 1)


Testing on real datasets mentioned earlier

In [141]:
from ucimlrepo import fetch_ucirepo 
  
# fetching classification dataset 
toxicity = fetch_ucirepo(id=728) 
  
# data (as pandas dataframes) 
X_toxic = toxicity.data.features 
y_toxic = toxicity.data.targets 


In [142]:
# for simplicity sake we are skipping all the categorical features and considering only around 10 random features

toxicity_features = ['MATS3v', 'MATS3s', 'MATS3p', 'nHBDon_Lipinski', 'minHBint8', 'MATS3e', 'MATS3c', 'MATS3m'] 
X_toxic = X_toxic[toxicity_features]

y_toxic = (np.array(y_toxic) != "NonToxic").astype(int)

In [44]:
from sklearn.metrics import accuracy_score as accsc, mean_squared_error as mse, r2_score as r2, precision_score as presc

In [144]:
cls_model_real = ClassificationModel(X_toxic.shape, 0.6)
cls_model_real.fit(X_toxic, np.asarray(y_toxic).reshape(-1, ))
y_cls_model_real = cls_model_real.predict_(X_toxic)

print("Accuracy score : ", accsc(y_toxic, y_cls_model_real))
print("Precision score : ", presc(y_toxic, y_cls_model_real))


(171, 3)
(171, 2)
(171, 1)
Accuracy score :  0.5087719298245614
Precision score :  0.35714285714285715


In [145]:
# fetching classification dataset https://archive.ics.uci.edu/dataset/265/physicochemical+properties+of+protein+tertiary+structure

reg_data = pd.read_csv('./datasets/CASP.csv')
reg_data.info()

X_casp, y_casp = reg_data.drop(['RMSD'], axis=1), reg_data['RMSD']

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45730 entries, 0 to 45729
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   RMSD    45730 non-null  float64
 1   F1      45730 non-null  float64
 2   F2      45730 non-null  float64
 3   F3      45730 non-null  float64
 4   F4      45730 non-null  float64
 5   F5      45730 non-null  float64
 6   F6      45730 non-null  float64
 7   F7      45730 non-null  float64
 8   F8      45730 non-null  int64  
 9   F9      45730 non-null  float64
dtypes: float64(9), int64(1)
memory usage: 3.5 MB


In [150]:
reg_model_real = RegressionModel(X_casp.shape)
reg_model_real.fit(X_casp, np.asarray(y_casp), epochs=100)
_, y_reg_model_real = reg_model_real.predict(X_casp)

print("R2 score : ", r2(y_casp, y_reg_model_real))
print("MSE : ", mse(y_casp, y_reg_model_real))


R2 score :  -2.5108894369907375e-05
MSE :  37.43385907519307


Important learning here:


the version implemented here uses exponential to compute tanh.
But for larger values, tanh tends to infinty resulting Nan values and its propagation.

Thus it is important to keep this in mind while building this activation function.

The manual implementatio is replaced with the numpy version for stability.