## Deep Learning Neural Network

In [1]:
#Run the following to check which is not working in case importing tensorflow and keras failed

# import tensorflow as tf
# print(f"TensorFlow version: {tf.__version__}")
# print(f"Keras version: {tf.keras.__version__}")

# # Test the specific import that was failing
# from tensorflow.keras.datasets import mnist
# print("Import successful!")

TensorFlow version: 2.19.0
Keras version: 3.10.0
Import successful!


In [1]:
#import packages

import time
import random
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import mnist
from keras.utils import to_categorical

print("import successful")

import successful


## Dataset + Default settings

In [3]:
import pandas as pd

In [135]:
df = pd.read_csv("NN_wide_data.csv")

In [137]:
df.head(5)

Unnamed: 0.1,Unnamed: 0,state,total_diff,gtrend_diff,pop_diff,pov_diff,income_diff,unemployed_diff,urban_pop_percent,rural_pop_percent,label
0,1,Alabama,-1707,-26.865672,286087,-3.2,7970,-4.7,59.0,41.0,negative
1,2,Alaska,-125,-44.776119,-1116,0.4,20350,-2.5,66.0,34.0,negative
2,3,Arizona,15285,-26.865672,838337,-6.0,18230,-3.9,89.8,10.2,positive
3,4,Arkansas,1012,-23.880597,109004,-4.0,12420,-3.6,56.2,43.8,positive
4,5,California,14558,-18.292683,944925,-4.8,16630,-3.9,95.0,5.0,positive


In [139]:
df.shape

(51, 11)

In [141]:
#Remove 'label' column and 'state' column as current pipeline is not yet compitible with non-numeric valeus

df.drop("label", axis = 1, inplace=True)
df.drop('state', axis = 1, inplace=True)

In [143]:
df.shape

(51, 9)

In [145]:
#Train-test split

from sklearn.model_selection import train_test_split

X = df.drop('total_diff', axis=1)
y = df['total_diff']

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size = 0.2,
    random_state = 42
)

In [31]:
#Check class distribution between train and test sets

print("Training set class distribution:")
print(y_train.value_counts(normalize=True))
print("\nTest set class distribution:")
print(y_test.value_counts(normalize=True))

Training set class distribution:
label
positive    0.8
negative    0.2
Name: proportion, dtype: float64

Test set class distribution:
label
positive    0.818182
negative    0.181818
Name: proportion, dtype: float64


### Transfer train and test sets into np array so that it has the format of (features, values)

In [147]:
X_train_array = X_train.values.T
y_train_array = y_train.values.T
X_test_array = X_test.values.T
y_test_array = y_test.values.T

y_train_array = y_train_array.reshape(1, X_train_array.shape[1])
y_test_array = y_test_array.reshape(1, X_test_array.shape[1])

In [149]:
print(X_train_array.shape)
print(y_train_array.shape)
print(X_test_array.shape)
print(y_test_array.shape)

(8, 40)
(1, 40)
(8, 11)
(1, 11)


### Normalize the features in train and test sets

In [181]:
X_train_norm = (X_train_array - np.mean(X_train_array, axis=1, keepdims=True)) / (np.std(X_train_array, axis=1, keepdims=True) + 1e-8)
X_test_norm = (X_test_array - np.mean(X_test_array, axis=1, keepdims=True)) / (np.std(X_test_array, axis=1, keepdims=True) + 1e-8)

### Define activation functions

`sigmoid_stable` and `softmax_stable` are defined and applied to replace original `sigmoid` and `softmax` function as they could cause overflow issues due to data characteristics

In [199]:
#Activation Functions

def sigmoid(Z):
    A = 1/(1+np.exp(-Z))
    return A

def softmax(z):
    expZ = np.exp(z)
    return expZ/(np.sum(expZ, 0))

def relu(Z):
    A = np.maximum(0,Z)
    return A

def tanh(x):
    return np.tanh(x)

def derivative_relu(Z):
    return np.array(Z > 0, dtype = 'float')

def derivative_tanh(x):
    return (1 - np.power(x, 2))

def sigmoid_stable(Z):
    """Numerically stable sigmoid that never outputs exactly 0 or 1"""
    Z = np.clip(Z, -500, 500)  # Prevent overflow
    sigmoid_output = 1 / (1 + np.exp(-Z))
    
    # Ensure output is never exactly 0 or 1
    sigmoid_output = np.clip(sigmoid_output, 1e-15, 1 - 1e-15)
    return sigmoid_output

def softmax_stable(Z):
    """Numerically stable softmax activation"""
    Z_shifted = Z - np.max(Z, axis=0, keepdims=True)  # Subtract max for stability
    exp_Z = np.exp(Z_shifted)
    return exp_Z / np.sum(exp_Z, axis=0, keepdims=True)

In [127]:
#Initialize Parameters

def initialize_parameters(layer_dims):
    
    parameters = {}
    L = len(layer_dims)            

    for l in range(1, L):
        parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) / np.sqrt(layer_dims[l-1]) #*0.01
        parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))
        
    return parameters

In [183]:
#Check for initialization

layer_dims = [X_train_norm.shape[0], 100, 200, y_train_array.shape[0]]
params = initialize_parameters(layer_dims)

for l in range(1, len(layer_dims)):
    print("Shape of W" + str(l) + ":", params['W' + str(l)].shape)
    print("Shape of B" + str(l) + ":", params['b' + str(l)].shape, "\n")

Shape of W1: (100, 8)
Shape of B1: (100, 1) 

Shape of W2: (200, 100)
Shape of B2: (200, 1) 

Shape of W3: (1, 200)
Shape of B3: (1, 1) 



## Forward Propagation

In [193]:
def forward_propagation(X, parameters, activation):
   
    forward_cache = {}
    L = len(parameters) // 2                  
    
    forward_cache['A0'] = X

    for l in range(1, L):
        forward_cache['Z' + str(l)] = parameters['W' + str(l)].dot(forward_cache['A' + str(l-1)]) + parameters['b' + str(l)]
        
        if activation == 'tanh':
            forward_cache['A' + str(l)] = tanh(forward_cache['Z' + str(l)])
        else:
            forward_cache['A' + str(l)] = relu(forward_cache['Z' + str(l)])
            

    forward_cache['Z' + str(L)] = parameters['W' + str(L)].dot(forward_cache['A' + str(L-1)]) + parameters['b' + str(L)]
    
    if forward_cache['Z' + str(L)].shape[0] == 1:
        forward_cache['A' + str(L)] = sigmoid_stable(forward_cache['Z' + str(L)])
    else :
        forward_cache['A' + str(L)] = softmax_stable(forward_cache['Z' + str(L)])
    
    return forward_cache['A' + str(L)], forward_cache

In [195]:
#Check for the forward propagation

aL, forw_cache = forward_propagation(X_train_norm, params, 'relu')

for l in range(len(params)//2 + 1):
    print("Shape of A" + str(l) + " :", forw_cache['A' + str(l)].shape)

Shape of A0 : (8, 40)
Shape of A1 : (100, 40)
Shape of A2 : (200, 40)
Shape of A3 : (1, 40)


## Cost Function

In [157]:
def compute_cost(AL, Y):
    m = Y.shape[1]
    
    if Y.shape[0] == 1:
        cost = (1./m) * (-np.dot(Y,np.log(AL).T) - np.dot(1-Y, np.log(1-AL).T))
    else:
        cost = -(1./m) * np.sum(Y * np.log(AL))
        
    cost = np.squeeze(cost)      # To make sure your cost's shape is what we expect (e.g. this turns [[17]] into 17).
    
    return cost

## Backward Propagation

In [159]:
def backward_propagation(AL, Y, parameters, forward_cache, activation):
    
    grads = {}
    L = len(parameters)//2
    m = AL.shape[1]
    
    grads["dZ" + str(L)] = AL - Y
    grads["dW" + str(L)] = 1./m * np.dot(grads["dZ" + str(L)],forward_cache['A' + str(L-1)].T)
    grads["db" + str(L)] = 1./m * np.sum(grads["dZ" + str(L)], axis = 1, keepdims = True)
    
    for l in reversed(range(1, L)):
        if activation == 'tanh':
            grads["dZ" + str(l)] = np.dot(parameters['W' + str(l+1)].T,grads["dZ" + str(l+1)])*derivative_tanh(forward_cache['A' + str(l)])
        else:
            grads["dZ" + str(l)] = np.dot(parameters['W' + str(l+1)].T,grads["dZ" + str(l+1)])*derivative_relu(forward_cache['A' + str(l)])
            
        grads["dW" + str(l)] = 1./m * np.dot(grads["dZ" + str(l)],forward_cache['A' + str(l-1)].T)
        grads["db" + str(l)] = 1./m * np.sum(grads["dZ" + str(l)], axis = 1, keepdims = True)

    return grads

In [163]:
#Check for backward propagation

grads = backward_propagation(forw_cache["A" + str(3)], y_train_array, params, forw_cache, 'relu')

for l in reversed(range(1, len(grads)//3 + 1)):
    print("Shape of dZ" + str(l) + " :", grads['dZ' + str(l)].shape)
    print("Shape of dW" + str(l) + " :", grads['dW' + str(l)].shape)
    print("Shape of dB" + str(l) + " :", grads['db' + str(l)].shape, "\n")

Shape of dZ3 : (1, 40)
Shape of dW3 : (1, 200)
Shape of dB3 : (1, 1) 

Shape of dZ2 : (200, 40)
Shape of dW2 : (200, 100)
Shape of dB2 : (200, 1) 

Shape of dZ1 : (100, 40)
Shape of dW1 : (100, 8)
Shape of dB1 : (100, 1) 



In [165]:
#Update Parameters

def update_parameters(parameters, grads, learning_rate):

    L = len(parameters) // 2 
    
    for l in range(L):
        parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate * grads["dW" + str(l+1)]
        parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate * grads["db" + str(l+1)]
        
    return parameters

## Predictions

In [167]:
def predict(X, y, parameters, activation):

    m = X.shape[1]
    y_pred, caches = forward_propagation(X, parameters, activation)
    
    if y.shape[0] == 1:
        y_pred = np.array(y_pred > 0.5, dtype = 'float')
    else:
        y = np.argmax(y, 0)
        y_pred = np.argmax(y_pred, 0)
    
    return np.round(np.sum((y_pred == y)/m), 2)

## Implement Network

Initialize parameters once, and then run the following in loop:
1. forward_prop(x, parameters)2. cost_function(aL, y)
3. 
backward_prop(x, y, parameters, forward_cach)
4. 
parameters = update_parameters(parameters, gradients, learning_rate)

In [187]:
def model(X, Y, layers_dims, learning_rate = 0.03, activation = 'relu', num_iterations = 3000):#lr was 0.009

    np.random.seed(1)
    costs = []              
    
    parameters = initialize_parameters(layers_dims)

    for i in range(0, num_iterations):

        AL, forward_cache = forward_propagation(X, parameters, activation)

        cost = compute_cost(AL, Y)

        grads = backward_propagation(AL, Y, parameters, forward_cache, activation)

        parameters = update_parameters(parameters, grads, learning_rate)
        
        if i % (num_iterations/10) == 0:
            print("\niter:{} \t cost: {} \t train_acc:{} \t test_acc:{}".format(i, np.round(cost, 2), predict(X_train_norm, y_train_array, parameters, activation), predict(X_test_norm, y_test_array, parameters, activation)))
        
        if i % 10 == 0:
            print("==", end = '')

       
    return parameters

In [201]:
layers_dims = [X_train_array.shape[0], 20, 7, 5, y_train_array.shape[0]] #  4-layer model
lr = 0.0075
iters = 2500

parameters = model(X_train_array, y_train_array, layers_dims, learning_rate = lr, activation = 'relu', num_iterations = iters)


iter:0 	 cost: 181148.97 	 train_acc:0.0 	 test_acc:0.0
iter:250 	 cost: nan 	 train_acc:0.0 	 test_acc:0.0
iter:500 	 cost: nan 	 train_acc:0.0 	 test_acc:0.0
iter:750 	 cost: nan 	 train_acc:0.0 	 test_acc:0.0
iter:1000 	 cost: nan 	 train_acc:0.0 	 test_acc:0.0
iter:1250 	 cost: nan 	 train_acc:0.0 	 test_acc:0.0
iter:1500 	 cost: nan 	 train_acc:0.0 	 test_acc:0.0
iter:1750 	 cost: nan 	 train_acc:0.0 	 test_acc:0.0
iter:2000 	 cost: nan 	 train_acc:0.0 	 test_acc:0.0
iter:2250 	 cost: nan 	 train_acc:0.0 	 test_acc:0.0