# **Implementation of Neural Networks from scratch**

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
filename="housing.csv"
df= pd.read_csv(filename)
df.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,-122.23,37.88,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,-122.22,37.86,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,-122.25,37.85,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,-122.25,37.85,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY


In [None]:
df.dropna(inplace=True)


In [None]:
y = df.median_house_value/100000
X=df.drop(["median_house_value","ocean_proximity"] , axis=1)
y=y.to_numpy()
X=X.to_numpy()
x_train, x_test, y_train, y_test = train_test_split(X, y,random_state=1, test_size=0.2)
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test= scaler.transform(x_test)


In [None]:
class FCLayer:
    def __init__(self, input_size, output_size):
        self.input_size = input_size
        self.output_size = output_size
        self.weights = np.random.randn(input_size, output_size) * np.sqrt(2 / input_size)
        self.bias = np.zeros((1, output_size))

    def forward(self, input):
        self.input = input
        return np.dot(input, self.weights) + self.bias

    def backward(self, output_error, learning_rate):
        input_error = np.dot(output_error, self.weights.T)
        weights_error = np.dot(self.input.T, output_error)

        self.weights -= learning_rate * weights_error
        self.bias -= learning_rate * output_error
        return input_error

In [None]:
class ActivationLayer:
    def __init__(self, activation, activation_prime):
        self.activation = activation
        self.activation_prime = activation_prime

    def forward(self, input):
        self.input = input
        return self.activation(input)

    def backward(self, output_error, learning_rate):
        return output_error * self.activation_prime(self.input)

In [None]:
class FlattenLayer:
    def __init__(self, input_shape):
        self.input_shape = input_shape

    def forward(self, input):
        return np.reshape(input, (1, -1))

    def backward(self, output_error, learning_rate):
        return np.reshape(output_error, self.input_shape)

In [None]:
def relu(x):
    return np.maximum(x, 0)

def relu_prime(x):
    return np.array(x >= 0).astype('int')

In [None]:
def sse(y_true, y_pred):
    return 0.5 * np.sum(np.power(y_true - y_pred, 2))

def sse_prime(y_true, y_pred):
    return y_pred - y_true

In [None]:
network = [
    FlattenLayer(input_shape=x_train.shape[1:]),
    FCLayer(x_train.shape[1],100),
    ActivationLayer(relu, relu_prime),
    FCLayer(100,50),
    ActivationLayer(relu, relu_prime),
    FCLayer(50, 1),
]

epochs = 100
learning_rate = 0.01

for epoch in range(epochs):
    error = 0
    for x, y_true in zip(x_train, y_train):
        output = x
        for layer in network:
            output = layer.forward(output)

        error += sse(y_true, output)

        output_error = sse_prime(y_true, output)
        for layer in reversed(network):
            output_error = layer.backward(output_error, learning_rate)

    error /= len(x_train)
    print('%d/%d, error=%f' % (epoch + 1, epochs, error))

1/100, error=0.260746
2/100, error=0.194591
3/100, error=0.179753
4/100, error=0.171894
5/100, error=0.166003
6/100, error=0.161926
7/100, error=0.158452
8/100, error=0.155284
9/100, error=0.152458
10/100, error=0.149865
11/100, error=0.147926
12/100, error=0.146249
13/100, error=0.144886
14/100, error=0.142850
15/100, error=0.141900
16/100, error=0.140579
17/100, error=0.139545
18/100, error=0.138748
19/100, error=0.137754
20/100, error=0.137396
21/100, error=0.136591
22/100, error=0.135892
23/100, error=0.135478
24/100, error=0.134367
25/100, error=0.134270
26/100, error=0.133248
27/100, error=0.132822
28/100, error=0.132578
29/100, error=0.131678
30/100, error=0.131181
31/100, error=0.130598
32/100, error=0.130549
33/100, error=0.129878
34/100, error=0.129286
35/100, error=0.129043
36/100, error=0.128481
37/100, error=0.128137
38/100, error=0.127688
39/100, error=0.127332
40/100, error=0.126879
41/100, error=0.126097
42/100, error=0.126025
43/100, error=0.125815
44/100, error=0.1252

In [None]:
from sklearn.metrics import r2_score
def predict(network, input):
    output = input
    for layer in network:
        output = layer.forward(output)
    return output

predictions = [predict(network, x) for x in x_test]
predictions = np.array(predictions).reshape(-1)

error = sum([sse(y, predict(network, x)) for x, y in zip(x_test, y_test)]) / len(x_test)
r2 = r2_score(y_test, predictions)
print('sse: %.4f' % error)
print('r2: %.4f' % r2)

sse: 0.1512
r2: 0.7726


# Advantages and disadvantages of ReLu

Advantages:

1. Easy to implement and more computationally efficient

2. Avoids vanishing gradients problem

Disadvantages:

1. Sensetive to initialization

2. Dying ReLU Problem
