# CS376 Programming Assignment 1

In [1]:
import numpy as np

def create_data(values):
    x = np.random.uniform(0, 1, (values, 2));
    y = np.prod(x, axis=1)
    return x, y

x, y = create_data(8000)

#### Activation Functions

In [2]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_deriv(x):
    return sigmoid(x) * (1 - sigmoid(x))

def relu(x):
    return np.maximum(0, x)

def relu_deriv(x):
    return np.where(x > 0, 1, 0)

def tanh(x):
    return np.tanh(x)

def tanh_deriv(x):
    return 1 - np.tanh(x) ** 2

#### Creating Neural Network

In [3]:
def initialize_weights(hidden_size):
    W1 = np.random.uniform(-1, 1, (hidden_size, 2))
    W2 = np.random.uniform(-1, 1, (hidden_size, hidden_size))
    W3 = np.random.uniform(-1, 1, (1, hidden_size))
    
    return W1, W2, W3

def neural_network(x, y, hidden_size, learning_rate, activation, activation_deriv, max_iterations=100):
    N = x.shape[0]
    # Initialize weights
    W1, W2, W3 = initialize_weights(hidden_size)
    
    # Initialize bias
    b1 = np.full((1, hidden_size), 0.0)
    b2 = np.full((1, hidden_size), 0.0)
    b3 = np.full((1, 1), 0.0)
    
    for iteration in range(max_iterations):
        total_error = 0
        
        # Shuffle data
        indices = np.random.randint(0, N, N)
        x_shuffled = x[indices]
        y_shuffled = y[indices]
        
        for i in range(N):
            x_i = x_shuffled[i].reshape(1, -1)  # Input (1 x 2)
            y_i = y_shuffled[i]                 # Target output (1 x 1)

            # Feed forward
            o1 = x_i                # Input layer (1 x 2)
            z2 = o1 @ W1.T + b1     # Hidden layer 1 input (2 x hidden_size)
            o2 = activation(z2)     # Hidden layer 2 output (hidden_size x hidden_size)
            z3 = o2 @ W2.T + b2     # Hidden layer 2 input (hidden_size x hidden_size)
            o3 = activation(z3)     # Hidden layer 2 output (1 x 1)
            z4 = o3 @ W3.T + b3     # Output layer input (1 x 1)
            o4 = z4                 # Output layer

            # Backpropagation
            error = o4 - y_i  # Output error (1 x 1)
            total_error += error ** 2

            d4 = error
            d3 = np.dot(d4, W3) * activation_deriv(z3)
            d2 = np.dot(d3, W2) * activation_deriv(z2)

            gradW3 = d4.T
            gradW2 = d3.T @ o2
            gradW1 = d2.T @ o1
            
            # Update weights
            W1 -= learning_rate * gradW1
            W2 -= learning_rate * gradW2
            W3 -= learning_rate * gradW3
            
            # Update bias
            b1 -= learning_rate * d2
            b2 -= learning_rate * d3
            b3 -= learning_rate * d4

        # Calculate RMSE
        rmse = np.sqrt(total_error / N)

        if (iteration + 1) % 10 == 0:
            print(f"Iteration {iteration+1}/{max_iterations}")
            print(f"RMSE: {rmse}")

    return W1, W2, W3, b1, b2, b3

#### Test function

In [4]:
def test(x, activation, W1, W2, W3, b1, b2, b3):
    o1 = x                  # Input layer (1 x 2)
    z2 = o1 @ W1.T + b1     # Hidden layer 1 input (2 x hidden_size)
    o2 = activation(z2)     # Hidden layer 2 output (hidden_size x hidden_size)
    z3 = o2 @ W2.T + b2     # Hidden layer 2 input (hidden_size x hidden_size)
    o3 = activation(z3)     # Hidden layer 2 output (1 x 1)
    z4 = o3 @ W3.T + b3     # Output layer input (1 x 1)
    
    return z4

#### Train Network with ReLU

In [5]:
W1, W2, W3, b1, b2, b3 = neural_network(x, y, 16, 0.1, relu, relu_deriv, 100)

Iteration 10/100
RMSE: [[0.00982797]]
Iteration 20/100
RMSE: [[0.00752639]]
Iteration 30/100
RMSE: [[0.00708409]]
Iteration 40/100
RMSE: [[0.00663317]]
Iteration 50/100
RMSE: [[0.00640811]]
Iteration 60/100
RMSE: [[0.00615314]]
Iteration 70/100
RMSE: [[0.00598634]]
Iteration 80/100
RMSE: [[0.00571492]]
Iteration 90/100
RMSE: [[0.00538226]]
Iteration 100/100
RMSE: [[0.00527329]]


#### Testing Results of ReLU

In [6]:
x_test, y_test = create_data(2000)

error = 0
for i in range(2000):
    print(test(x_test[i], relu, W1, W2, W3, b1, b2, b3), y_test[i])
    error += (test(x_test[i], relu, W1, W2, W3, b1, b2, b3) - y_test[i]) ** 2
    
rmse = np.sqrt(error / 2000)
print(f"RMSE: {rmse}")

[[0.01624591]] 0.012863971017317764
[[0.03911201]] 0.03230676909507954
[[0.00434566]] 0.007781175350371361
[[0.07710371]] 0.08266662951529169
[[0.20077331]] 0.20212876854099604
[[0.11901117]] 0.11813226629474417
[[0.14107987]] 0.13434399520279533
[[0.77652996]] 0.7729641337138069
[[0.24803456]] 0.2413020104806415
[[0.10709643]] 0.10020338280094775
[[0.25157551]] 0.24829438343426927
[[0.56947824]] 0.5699396570100657
[[0.09392982]] 0.08681939755805601
[[0.14862334]] 0.15054477839559857
[[0.03322876]] 0.028992861914366508
[[0.07527949]] 0.07437835297894212
[[0.37136502]] 0.3665902675949858
[[0.53129102]] 0.532471294106207
[[0.15074171]] 0.15897650231530608
[[0.02134818]] 0.01447017047303086
[[0.58768419]] 0.5908304653390718
[[0.2391398]] 0.24572208362710468
[[0.18567165]] 0.18571879707672564
[[0.79740552]] 0.7937166397807784
[[0.06190238]] 0.06641512589238073
[[0.05084638]] 0.052290210079499015
[[0.02073738]] 0.01704052340176588
[[0.58839114]] 0.5901770706892506
[[0.46013933]] 0.458878721

#### Training Network with Sigmoid

In [7]:
W1, W2, W3, b1, b2, b3 = neural_network(x, y, 16, 0.1, sigmoid, sigmoid_deriv, 100)

Iteration 10/100
RMSE: [[0.02195322]]
Iteration 20/100
RMSE: [[0.00838936]]
Iteration 30/100
RMSE: [[0.00797927]]
Iteration 40/100
RMSE: [[0.00743451]]
Iteration 50/100
RMSE: [[0.00704222]]
Iteration 60/100
RMSE: [[0.00683813]]
Iteration 70/100
RMSE: [[0.00642823]]
Iteration 80/100
RMSE: [[0.00625002]]
Iteration 90/100
RMSE: [[0.00609367]]
Iteration 100/100
RMSE: [[0.00591191]]


#### Testing Results of Sigmoid

In [8]:
x_test, y_test = create_data(2000)

error = 0
for i in range(2000):
    print(test(x_test[i], sigmoid, W1, W2, W3, b1, b2, b3), y_test[i])
    error += (test(x_test[i], sigmoid, W1, W2, W3, b1, b2, b3) - y_test[i]) ** 2
    
rmse = np.sqrt(error / 2000)
print(f"RMSE: {rmse}")

[[0.05257415]] 0.050464199637899665
[[0.60589781]] 0.5941437894636857
[[0.82982158]] 0.8320955317522772
[[0.22157325]] 0.22000111247178275
[[0.36203926]] 0.35722929271332204
[[0.01068182]] 0.010573008589304435
[[0.07324237]] 0.06916519842720978
[[0.01532836]] 0.010686254955960385
[[0.07080975]] 0.06750127516994428
[[0.26980002]] 0.2745632556027132
[[0.21746314]] 0.21596970942681995
[[0.00820502]] 0.0014629007815724381
[[0.33205514]] 0.33050925801305475
[[0.0957171]] 0.09120043426467114
[[0.11635145]] 0.11797617380404474
[[0.16773822]] 0.16550866398981887
[[0.00697233]] 0.006499297991148181
[[0.26425456]] 0.26433180613632756
[[0.75348274]] 0.7530282473266648
[[0.61639631]] 0.6047684441794369
[[0.06153927]] 0.0579841104185361
[[0.30422364]] 0.30518220828404263
[[0.08340219]] 0.07914135478510044
[[0.60407135]] 0.5977818237488515
[[0.23149476]] 0.23636758838915548
[[0.27149056]] 0.2710752596453371
[[0.19974464]] 0.20241618184730606
[[0.59926438]] 0.5880468550245475
[[0.45798678]] 0.4495364

#### Training Network with tanh

In [9]:
W1, W2, W3, b1, b2, b3 = neural_network(x, y, 16, 0.1, tanh, tanh_deriv, 100)

Iteration 10/100
RMSE: [[0.00695136]]
Iteration 20/100
RMSE: [[0.004669]]
Iteration 30/100
RMSE: [[0.00346342]]
Iteration 40/100
RMSE: [[0.00285857]]
Iteration 50/100
RMSE: [[0.002272]]
Iteration 60/100
RMSE: [[0.00205981]]
Iteration 70/100
RMSE: [[0.00182049]]
Iteration 80/100
RMSE: [[0.0016296]]
Iteration 90/100
RMSE: [[0.00149983]]
Iteration 100/100
RMSE: [[0.00139135]]


#### Testing Results of tanh

In [10]:
x_test, y_test = create_data(2000)

error = 0
for i in range(2000):
    print(test(x_test[i], tanh, W1, W2, W3, b1, b2, b3), y_test[i])
    error += (test(x_test[i], tanh, W1, W2, W3, b1, b2, b3) - y_test[i]) ** 2
    
rmse = np.sqrt(error / 2000)
print(f"RMSE: {rmse}")

[[0.04560232]] 0.044976331628676175
[[0.21232518]] 0.21283996815752107
[[0.90305139]] 0.9041322881929914
[[0.40125128]] 0.4000907619677956
[[0.58337571]] 0.5818393105424295
[[0.43893514]] 0.43758864045240836
[[0.03115782]] 0.03117149078430989
[[0.85346589]] 0.8528761438464678
[[0.04510441]] 0.04306065314364889
[[0.28532678]] 0.2867137949020989
[[0.02298259]] 0.024096387408177274
[[0.01923894]] 0.01929625429700575
[[0.11970632]] 0.12005607212149626
[[0.34238647]] 0.342885977191929
[[0.03010484]] 0.02963121536690645
[[0.00717906]] 0.006918807799096498
[[0.18237734]] 0.18178731537681744
[[0.1593841]] 0.15984269040647384
[[0.03892881]] 0.03856186313318794
[[0.07249243]] 0.07221320404002197
[[0.21142621]] 0.21074829907936646
[[0.39397425]] 0.393278493403622
[[0.12061053]] 0.12019613396768233
[[0.174182]] 0.17371583042012423
[[0.02171726]] 0.022532701041959032
[[0.10128205]] 0.1022561478767649
[[0.62854143]] 0.626138280940703
[[0.4595794]] 0.45983619343033
[[0.64676741]] 0.6444126568873935
[