# Deep Learning - Assignment 1

#### Submitted By: Kalyani Prashant Kawale
#### Student ID: 21237189

## Solution:

In [1]:
# Package imports
import matplotlib
import matplotlib.pyplot as plt
import sklearn
import sklearn.datasets
import pandas as pd
import numpy as np
import itertools
from tabulate import tabulate
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

np.random.seed(200)
# Display plots inline and change default figure size
%matplotlib inline

### Task 1: Implement Logistic Regression

In [2]:
def LogisticRegressor(X, y, alpha, max_iters):
    (nsamples, nattributes) = np.shape(X)
    threshold = 1e-6
    w = np.random.rand(nattributes)
    b = np.random.rand()
    J_prev = 0
    i = 0
    for i in range(max_iters):   
        idx = np.random.choice(nsamples, 1, replace=False)
        random_X = X[idx[0]]
        random_y = y[idx[0]]
        y_hat = 1 / (1 + np.exp(-1 * (np.dot(w, random_X) + b)))
        J_curr = -1 * ((random_y * np.log(y_hat)) + ((1 - random_y) * np.log(1 - y_hat)))
        if np.absolute(J_curr - J_prev) < threshold:
            break
        else:
            J_prev = J_curr        
        delta_w = []
        for j in range(len(w)):
            delta_w.append((y_hat - random_y) * random_X[j])
        delta_b = (y_hat - random_y)
        for j in range(len(w)):
            w[j] -= alpha * delta_w[j]
        b -= alpha * delta_b
        
    return w, b

def predict(data, weights, bias):
    predictions = []
    for sample in data:
        prediction = 1 / (1 + np.exp(-1 * (np.dot(weights, sample) + bias))) 
        if prediction >= 0.5:
            predictions.append(1)
        else:
            predictions.append(0)
    return predictions

### Task 2: Training, Validating, Testing on blobs300 and circles600 Datasets

In [3]:
files = ["blobs300.csv", "circles600.csv"]
alpha_s = [0.1, 0.001, 0.0001]
iterations = [1000, 10000, 20000]
best_alpha = 0.1
best_iterations = 1000
best_accuracy = 0
X_train = {}
y_train = {}
X_valid = {}
y_valid = {}
X_test = {}
y_test = {}
results = pd.DataFrame(columns=['File Name', 'Learning Rate', 'Iterations', 'Accuracy'])
for file in files:
    # Use pandas to read the CSV file as a dataframe
    df = pd.read_csv(file).sample(frac=1).reset_index(drop=True)
    
    # The y values are those labelled 'Class': extract their values
    y = df['Class'].values

    # The x values are all other columns
    del df['Class']   # drop the 'Class' column from the dataframe
    X = df.values     # convert the remaining columns to a numpy array
    
    X_train[file], X_validate_test, y_train[file], y_validate_test = train_test_split(X, y, train_size=0.7)

    X_valid[file], X_test[file], y_valid[file], y_test[file] = train_test_split(X_validate_test, y_validate_test, test_size=0.5)
    
    for alpha, iters in itertools.product(alpha_s, iterations):
        row = {'File Name': file, 'Learning Rate': alpha, 'Iterations': iters}
        w, b = LogisticRegressor(X_train[file], y_train[file], alpha, iters)
        predictions = predict(X_valid[file], w, b)
        accuracy = accuracy_score(y_valid[file], predictions)
        row['Accuracy'] = accuracy
        results = results.append(row, ignore_index=True)
        if accuracy > best_accuracy:
            best_alpha = alpha
            best_iterations = iters
            best_accuracy = accuracy


In [4]:
print("Training and Validation Results:\n")
print(results.to_markdown())

Training and Validation Results:

|    | File Name      |   Learning Rate |   Iterations |   Accuracy |
|---:|:---------------|----------------:|-------------:|-----------:|
|  0 | blobs300.csv   |          0.1    |         1000 |   1        |
|  1 | blobs300.csv   |          0.1    |        10000 |   1        |
|  2 | blobs300.csv   |          0.1    |        20000 |   1        |
|  3 | blobs300.csv   |          0.001  |         1000 |   0.933333 |
|  4 | blobs300.csv   |          0.001  |        10000 |   1        |
|  5 | blobs300.csv   |          0.001  |        20000 |   0.977778 |
|  6 | blobs300.csv   |          0.0001 |         1000 |   0.577778 |
|  7 | blobs300.csv   |          0.0001 |        10000 |   0.577778 |
|  8 | blobs300.csv   |          0.0001 |        20000 |   0.911111 |
|  9 | circles600.csv |          0.1    |         1000 |   0.655556 |
| 10 | circles600.csv |          0.1    |        10000 |   0.477778 |
| 11 | circles600.csv |          0.1    |        20000 |

In [5]:
print("Testing Results:")
for file in files:
    print(file)
    w, b = LogisticRegressor(X_train[file], y_train[file], best_alpha, best_iterations)
    predictions = predict(X_test[file], w, b)
    print(f"Accuracy: {accuracy_score(y_test[file], predictions)}")

Testing Results:
blobs300.csv
Accuracy: 1.0
circles600.csv
Accuracy: 0.6777777777777778


### Task 3: Shallow Neural Network:

In [6]:
# def f(z):
#     return 1 / (1 + np.exp(-1 * z))

# def LogisticRegressorNN(X, y, alpha, max_iters):
#     (nsamples, nattributes) = np.shape(X)
# #     hidden_layer_nodes = int(nattributes/2)
#     hidden_layer_nodes = 2
#     threshold = 1e-6
#     # Initialising weights and biases for input to hidden layer
#     w_H = np.random.rand(hidden_layer_nodes, nattributes)
#     b_H = np.random.rand(hidden_layer_nodes)
#     # Initialising weights and biases for hidden to output layer
#     w_L = np.random.rand(hidden_layer_nodes)
#     b_L = np.random.rand()
#     J_prev = 0
#     # Iterating until convergence
#     for x in range(max_iters):
#         # Selecting a random sample from dataset
#         idx = np.random.choice(nsamples, 1, replace=False)
#         random_X = X[idx[0]]
#         random_y = y[idx[0]]
        
#         # Forward Propogation
#         z_H = []
#         a_H = []
#         for i in range(hidden_layer_nodes):
#             temp = 0
#             for j in range(nattributes):
#                 temp += (w_H[i][j] * random_X[j]) + b_H[i]
#             z_H.append(temp)
#             a_H.append(f(z_H[i]))
        
#         z_L = 0
#         for i in range(hidden_layer_nodes):
#             z_L += (w_L[i] * a_H[i]) + b_L
#         y_hat = f(z_L)
#         J_curr = -1 * ((random_y * np.log(y_hat)) + ((1 - random_y) * np.log(1 - y_hat)))
        
#         # Backward Propogation        
#         # Propogating error back from output to hidden layer
#         delta_z_L = y_hat - random_y
#         delta_w_L = []
#         for i in range(hidden_layer_nodes):
#             delta_w_L.append(delta_z_L * a_H[i])
#         delta_b_L = delta_z_L
        
#         # Propogating error back from output to hidden layer
#         delta_z_H = []        
#         for i in range(hidden_layer_nodes):
#             f_dash = f(z_H[i]) * (1 - f(z_H[i]))
#             delta_z_H.append(f_dash * (delta_z_L * w_L[i]))

#         delta_w_H = []
#         for i in range(hidden_layer_nodes):
#             for j in range(nattributes):
#                 delta_w_H.append(delta_z_H[i] * random_X[j])
#         delta_b_H = delta_z_H
        
#         # Gradient Descent
#         if np.absolute(J_curr - J_prev) < threshold:
#             break
#         else:
#             J_prev = J_curr
            
#         for i in range(hidden_layer_nodes):
#             w_L[i] -= alpha * delta_w_L[i]
#         b_L -= alpha * delta_b_L
        
#         for i in range(hidden_layer_nodes):
# #             for j in range(nattributes):                
#             w_H[i] -= alpha * delta_w_H[i]
#             b_H[i] -= alpha * delta_b_H[i]
        
#     return (w_H, b_H, w_L, b_L)

# def predictNN(data, w_H, b_H, w_L, b_L):
#     (nsamples, nattributes) = np.shape(X)
#     hidden_layer_nodes = int(nattributes/2)
#     probabilities = []
#     for sample in data:
#         z_H = []
#         a_H = []
#         for i in range(hidden_layer_nodes):
#             temp = 0
#             for j in range(nattributes):
#                 temp += (w_H[i][j] * sample[j]) + b_H[i]
#             z_H.append(temp)
#             a_H.append(f(z_H[i]))

#         z_L = 0
#         for i in range(hidden_layer_nodes):
#             z_L += (w_L[i] * a_H[i]) + b_L
#         probabilities.append(f(z_L))
#     predictions = []
#     for probability in probabilities:
#         if probability > 0.5:
#             predictions.append(1)
#         else:
#             predictions.append(0)
#     return predictions    

In [7]:
df = pd.read_csv("blobs300.csv").sample(frac=1).reset_index(drop=True)
# df = pd.read_csv("circles600.csv").sample(frac=1).reset_index(drop=True)

y = df['Class'].values

del df['Class']   # drop the 'Class' column from the dataframe
X = df.values     # convert the remaining columns to a numpy array

X_train, X_validate_test, y_train, y_validate_test = train_test_split(X, y, train_size=0.7)

X_valid, X_test, y_valid, y_test = train_test_split(X_validate_test, y_validate_test, test_size=0.5)

# w_H, b_H, w_L, b_L = LogisticRegressorNN(X_train, y_train, 0.1, 1000)

# predictions = predictNN(X_valid, w_H, b_H, w_L, b_L)
# print(f"Accuracy: {accuracy_score(y_valid, predictions)}")


In [19]:
def Initialise(nattributes=0, n_layers=2):
    # Initialising the threshold
    threshold = 1e-6
    # Setting number of layers, including output layer
    layers = n_layers
    # Setting the number of nodes in each hidden layer
    layer_nodes = [int(nattributes/2) for layer in range(layers - 1)]
    # Setting the number of nodes in output layer
    layer_nodes.append(1)
    # Initialising weights dictionary
    weights = {}
    biases = {}
    # Initialising the weights matrix with the weights from input layer to first hidden layer
    weights['l0'] = np.random.rand(layer_nodes[0], nattributes)
    # Initialising the bias for each node of the first hidden layer
    biases['l0'] = np.random.rand(layer_nodes[0])    
    # Initialising weights and biases in each layer
    for layer in range(layers - 1):
        weights['l'+str(layer+1)] = np.squeeze(np.random.rand(layer_nodes[layer + 1], layer_nodes[layer]))
        biases['l'+str(layer+1)] = np.squeeze(np.random.rand(layer_nodes[layer + 1]))
        
    return threshold, weights, biases

def f(z):
    return 1 / (1 + np.exp(-1 * z))

def f_dash(z):
    return f(z) * (1 - f(z))

def ForwardPropogation(weights, biases, x):
    # Forward Propogation
    activations = {}
    activations['l0'] = x
    sigmas = {}
    
    layers = list(weights.keys())
    last_weight_layer = layers[len(weights)-1]
    
    for idx, layer in enumerate(weights):
        sigmas['l'+str(idx + 1)] = np.array([])
        activations['l'+str(idx + 1)] = np.array([])
        
        if layer != last_weight_layer:
            for i in range(len(weights[layer])):            
                sigma = 0
                for j in range(len(weights[layer][i])):
                    sigma += (weights[layer][i][j] * activations[layer][j]) + biases[layer][i]                
                sigmas['l'+str(idx + 1)] = np.append(sigmas['l'+str(idx + 1)], sigma)
                activations['l'+str(idx + 1)] = np.append(activations['l'+str(idx + 1)], f(sigma))
        else:
            sigma = 0
            for i in range(len(weights[layer])):
                sigma += (weights[layer][i] * activations[layer][i]) + biases[layer]
            sigmas['l'+str(idx + 1)] = np.append(sigmas['l'+str(idx + 1)], sigma)
            activations['l'+str(idx + 1)] = np.append(activations['l'+str(idx + 1)], f(sigma))
        
    return activations, sigmas

def BackPropagation(activations, sigmas, weights, y, layers, output_layer):    
    delta_z = {}
    delta_b = {}
    delta_w = {}
    # Calculating delta weights and delta bais for output layer
    
    delta_z[output_layer] = activations[output_layer] - y
    last_weight_layer = layers[layers.index(output_layer) - 1]
    delta_b[last_weight_layer] = delta_z[output_layer]    
    delta_w[last_weight_layer] = np.array([])
    for activation in activations[last_weight_layer]:
        delta_w[last_weight_layer] = np.append(delta_w[last_weight_layer], delta_z[output_layer] * activation) 
    
    # Calculating delta weights and delta bais for hidden layers   
    
    for layer_idx, layer in reversed(list(enumerate(layers))):        
        if layer != 'l0':
            print(f"{layer_idx}, {layer}")

            current_layer = layers[layer_idx - 1]
            print(current_layer)
            if current_layer != 'l0':
                current_nodes = sigmas[current_layer]
                delta_z[current_layer] = np.array([])               

                for i in range(len(current_nodes)):
                    _sum = 0
                    try:
                        for j in range(len(weights[current_layer][i])):
                            _sum += delta_z[layer][j] * weights[current_layer][i][j]
                        delta_sigma = f_dash(current_nodes[i]) * _sum                
                        delta_z[current_layer] = np.append(delta_z[current_layer], delta_sigma)
                    except:                    
                        for j in range(len(weights[current_layer])):
                            _sum += delta_z[layer] * weights[current_layer][j]
                        delta_sigma = f_dash(current_nodes[i]) * _sum                
                        delta_z[current_layer] = np.append(delta_z[current_layer], delta_sigma)
        
        ####NEEEDDDD TOOOO WOOOORRRKKK HEEERERERERE
    for layer_idx, layer in reversed(list(enumerate(layers))):        
        if layer != output_layer and layer != last_weight_layer:            
            delta_w[layer] = np.array([])
            delta_layer = layers[layer_idx + 1]            
            for activation in activations[layer]:
                node_delta_weights = np.array([])
                for delta in delta_z[delta_layer]:
                    node_delta_weights = np.append(node_delta_weights, delta * activation)
                delta_w[layer] = np.append(delta_w[last_weight_layer], node_delta_weights) 
    
            delta_b[layer] = delta_z[delta_layer]
        
    return delta_w, delta_b

def LogisticRegressorNN(X, y, alpha, max_iters):
    (nsamples, nattributes) = np.shape(X)
    threshold, weights, biases, =  Initialise(nattributes=nattributes, n_layers=3)
    J_prev = 0
    max_iters = 1
    # Iterating until convergence
    for x in range(max_iters):
        # Selecting a random sample from dataset
        idx = np.random.choice(nsamples, 1, replace=False)
        random_X = X[idx[0]]
        random_y = y[idx[0]]
        activations, sigmas = ForwardPropogation(weights, biases, random_X)
        layers = list(activations.keys())
        output_layer = layers[len(activations)-1]
        y_hat = activations[output_layer]
        J_curr = -1 * ((random_y * np.log(y_hat)) + ((1 - random_y) * np.log(1 - y_hat)))
        delta_weights, delta_biases = BackPropagation(activations, sigmas, weights, random_y, layers, output_layer)
        print(delta_weights)
        print(weights)
        # Gradient Descent
        if np.absolute(J_curr - J_prev) < threshold:
            break
        else:
            J_prev = J_curr
        
        for layer in layers:
            if layer != output_layer:
#             print(layer)
#             print(delta_weights[layer])
#             print("eek")
#             print(weights[layer])
                weights[layer] -= alpha * delta_weights[layer]
                biases[layer] -= alpha * delta_biases[layer]
        

LogisticRegressorNN(X_train, y_train, 0.1, 1000)

3, l3
l2
2, l2
l1
1, l1
l0
{'l2': array([0.78757147, 0.91287139]), 'l1': array([0.78757147, 0.91287139, 0.26257361, 0.0910724 ]), 'l0': array([7.87571469e-01, 9.12871393e-01, 3.39888465e-04, 1.87986586e-03])}
{'l0': array([[0.81158624, 0.04213984, 0.4090011 , 0.7304773 ],
       [0.02843178, 0.9239733 , 0.6706423 , 0.04895458]]), 'l1': array([[0.71626912, 0.62057536],
       [0.3574766 , 0.95100196]]), 'l2': array([0.89952034, 0.91887324])}


ValueError: operands could not be broadcast together with shapes (2,2) (4,) (2,2) 