In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

np.random.seed(0)
df_train = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/ML Bootcamp data/linear_train.csv",index_col = 0)

def sigmoid(z):
    return 1/(1+np.exp(-z))

def sigmoid_derivative (x):
    return sigmoid(x)*(1-sigmoid(x))

def R2(out,y):
    return 1 - (np.sum((out-y)**2))/(np.sum((y - np.mean(y))**2))


class Layer():
    def __init__(self,n_inputs,n_neurons):
        self.weights = np.random.randn(n_inputs,n_neurons)
        self.bias = np.random.randn(1,n_neurons)
    def forward(self,inputs):
        self.outputs = inputs @ self.weights + self.bias
        
    def backward(self,error,weights):
        self.error = ( error @ np.transpose(weights) ) * sigmoid_derivative(self.outputs)

    def set_error(self,error):
        self.error = error

    def fill(self,locw,locb):
        self.weights = np.load(locw)
        self.bias = np.load(locb)
 
def forward_propagate(x,layers,act):
    
    n = len(layers)-1
    for i in range(n):
        
        if i == 0 :
            layers[0].forward(x)
            act[0] = sigmoid(layers[0].outputs)
        else:
            layers[i].forward(act[i-1])
            act[i] = sigmoid(layers[i].outputs)

    layers[n].forward(act[n-1])
    act[n] = (layers[n].outputs)

    return layers,act

def backward_propagate(x,y,learning_rate,layers,act):
    size = act[-1].shape[0]
    n = len(layers) -1
    error = -(act[-1]-y)

    for i in range(n,0,-1):
        if i == (n):
            layers[i].set_error(error)
            Jw = np.array(act[i-1]).T @ layers[i].error 
            Jb = np.sum(layers[i].error,axis = 0)

        else :
            layers[i].backward(layers[i+1].error,layers[i+1].weights)
            Jw = np.array(act[i-1]).T @ layers[i].error
            Jb = np.sum(layers[i].error,axis = 0)

        layers[i].weights += learning_rate/size*Jw
        layers[i].bias += learning_rate/size*Jb

    layers[0].backward(layers[1].error,layers[1].weights)
    Jw = np.array(x).T @ layers[0].error
    Jb = np.sum(layers[0].error,axis = 0)

    layers[0].weights += learning_rate/size*Jw
    layers[0].bias += learning_rate/size*Jb

    return layers


In [None]:
train_data = df_train.iloc[:45000,:-1].values
test_data = df_train.iloc[45000:,:-1].values
label_train = df_train.iloc[:45000,-1].to_numpy()
label_test = df_train.iloc[45000:,-1].to_numpy()

x = (train_data - np.mean(train_data))/np.std(train_data)
x_test = (test_data - np.mean(train_data))/np.std(train_data)
y = (label_train - np.mean(label_train))/np.std(label_train)
y_test = (label_test - np.mean(label_train))/np.std(label_train)

y = y.reshape([label_train.shape[0],1])
y_test = y_test.reshape([label_test.shape[0],1])

n = 2 # number of hidden layers
m = 20 # number of neurons in hidden layers

layers , act = [1]*(n+1),[1]*(n+1)

for i in range(n):
    if i == 0 :
        layers[0] = Layer(20,m)
        layers[0].forward(x)
        act[0] = sigmoid(layers[0].outputs)
    else:
        layers[i] = Layer(m,m)
        layers[i].forward(act[i-1])
        act[i] = sigmoid(layers[i].outputs)
layers[n] = Layer(m,1)
layers[n].forward(act[n-1])
act[n] = (layers[n].outputs)


In [None]:
learning_rate=0.01
n_iterations = 10000
start = 0
end = x.shape[0]
step = x.shape[0]//10
itr = 0

for i in range(n_iterations):
    for k in range(start,end,step):

        layers,act = forward_propagate(x[k:k+step,:],layers,act)
        layers = backward_propagate(x[k:k+step,:],y[k:k+step,0].reshape([step,1]),learning_rate,layers,act)
        layers,act = forward_propagate(x[k:k+step,:],layers,act)

        loss = np.mean((act[-1] - y[k:k+step,0].reshape([step,1]))**2)
        itr+=1

        if (itr % 100 == 0):
            print("After {} iterations : ".format(itr))
            print("loss = ",loss)
            layers_test ,act_test = forward_propagate(x_test,layers,act) 
            score = R2(act_test[-1],y_test)
            print(score*100)

After 100 iterations : 
loss =  0.15907640509653018
84.3728868491374
After 200 iterations : 
loss =  0.15907668354342047
84.37282942061788
After 300 iterations : 
loss =  0.15907682522049135
84.37281508920294
After 400 iterations : 
loss =  0.15907688672062056
84.37282082077867
After 500 iterations : 
loss =  0.15907690218362747
84.37283588694173
After 600 iterations : 
loss =  0.159076891861459
84.37285516805582
After 700 iterations : 
loss =  0.1590768676701995
84.37287616751337
After 800 iterations : 
loss =  0.1590768365484404
84.37289765490533


KeyboardInterrupt: ignored

In [None]:

np.save("/content/drive/MyDrive/Colab Notebooks/ML Bootcamp data/nnlw1.npy",layers[0].weights,allow_pickle = True)
np.save("/content/drive/MyDrive/Colab Notebooks/ML Bootcamp data/nnlb1.npy",layers[0].bias)
np.save("/content/drive/MyDrive/Colab Notebooks/ML Bootcamp data/nnlw2.npy",layers[1].weights)
np.save("/content/drive/MyDrive/Colab Notebooks/ML Bootcamp data/nnlb2.npy",layers[1].bias)
np.save("/content/drive/MyDrive/Colab Notebooks/ML Bootcamp data/nnlw3.npy",layers[2].weights)
np.save("/content/drive/MyDrive/Colab Notebooks/ML Bootcamp data/nnlb3.npy",layers[2].bias)




In [None]:
df_test = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/ML Bootcamp data/linear_test_data.csv")
X_test = df_test.iloc[:,1:].values
ids = df_test.iloc[:,0].values

Test_data = (X_test - np.mean(train_data))/np.std(train_data)
layers_test,act_test = forward_propagate(Test_data,layers,act)
Y_pred = layers_test[-1].outputs
Y_res = Y_pred * np.std(label_train,axis = 0) + np.mean(label_train,axis = 0)

res = pd.DataFrame([ids,Y_res],index = ["Ids","Prediction"]).T
print(res)
res.to_csv("/content/drive/MyDrive/Colab Notebooks/ML Bootcamp data/nnl_result.csv")

         Ids             Prediction
0      96396    [7.348457897488052]
1      80166    [164.4475109445632]
2      20516    [57.87554743731631]
3      88881  [-101.93493893013674]
4      69115   [24.362384538538038]
...      ...                    ...
19995  59011   [145.11060184763016]
19996  66308    [41.18371640706795]
19997  99637     [93.8499510226587]
19998  84616   [325.20994774008125]
19999  66210  [-169.40233584893198]

[20000 rows x 2 columns]
