In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
test = pd.read_csv("C:/Users/adith/Desktop/Datasets/Polynomial_test.csv")
train = pd.read_csv("C:/Users/adith/Desktop/Datasets/Polynomial_train.csv")
x_train_scaled = (train - train.mean()) / (train.std()) 
x_test_scaled = (test - test.mean()) / (test.std())
x_train_scaled.head()

Unnamed: 0.1,Unnamed: 0,X,Y,Z,label
0,-1.731999,0.105753,-1.697386,-1.010452,0.244413
1,-1.73193,-0.242734,-0.643264,-1.515023,0.102123
2,-1.73186,-0.714943,-0.310736,-0.321029,-0.268685
3,-1.731791,-1.519383,-0.325833,0.670617,-0.009539
4,-1.731722,0.924624,0.068216,1.156332,-0.295934


In [3]:
x_train_scaled = x_train_scaled.drop(["Unnamed: 0"], axis = 1)
x_test_scaled = x_test_scaled.drop(["Unnamed: 0"], axis = 1)
x_train_scaled.head()

Unnamed: 0,X,Y,Z,label
0,0.105753,-1.697386,-1.010452,0.244413
1,-0.242734,-0.643264,-1.515023,0.102123
2,-0.714943,-0.310736,-0.321029,-0.268685
3,-1.519383,-0.325833,0.670617,-0.009539
4,0.924624,0.068216,1.156332,-0.295934


In [5]:
y = x_train_scaled["label"]
y_tests = x_test_scaled["label"]
print(y.shape)

(50000,)


In [6]:
X_train = x_train_scaled.values
X_train = X_train.T
print(X_train.shape)

Y_train = np.array([y])
Y_train = Y_train.reshape(1,50000)
print(Y_train.shape)
X_test = x_test_scaled.values
X_test = X_test.T
Y_test = np.array([y_tests])
Y_test = Y_test.reshape(1,10000)
print(Y_test.shape)

(4, 50000)
(1, 50000)
(1, 10000)


In [7]:
def tanh(x):
    return np.tanh(x)

def relu(x):
    return np.maximum(x, 0)

def softmax(x):
    expX = np.exp(x)
    return expX/np.sum(expX, axis = 0)

In [8]:
def derivative_tanh(x):
    return (1 - np.power(np.tanh(x), 2))

def derivative_relu(x):
    return np.array(x > 0, dtype = np.float32)

In [9]:
def initialize_parameters(n_x, n_h, n_y):
    w1 = np.random.randn(n_h, n_x)
    b1 = np.zeros((n_h, 1))
    
    w2 = np.random.randn(n_y, n_h)
    b2 = np.zeros((n_y, 1))
    
    parameters = {
        "w1" : w1,
        "b1" : b1,
        "w2" : w2,
        "b2" : b2
    }
    
    return parameters

In [10]:
def forward_propagation(x, parameters):
    
    w1 = parameters['w1']
    b1 = parameters['b1']
    w2 = parameters['w2']
    b2 = parameters['b2']
    
    z1 = np.dot(w1, x) + b1
    a1 = z1
    
    z2 = np.dot(w2, a1) + b2
    a2 = z2
    
    forward_cache = {
        "z1" : z1,
        "a1" : a1,
        "z2" : z2,
        "a2" : a2
    }
    
    return forward_cache

In [11]:
def cost_function(a2, y):
    m = y.shape[1]
    
    cost = (1/2*m)*np.sum(np.square(y-a2))
    
    #cost = -(1/m)*np.sum(np.sum(y*np.log(a2, 0), 1))
    
    return cost

In [12]:
def backward_prop(x, y, parameters, forward_cache):
    
    w1 = parameters['w1']
    b1 = parameters['b1']
    w2 = parameters['w2']
    b2 = parameters['b2']
    
    a1 = forward_cache['a1']
    a2 = forward_cache['a2']
    
    m = x.shape[1]
    
    dz2 = (a2 - y)
    dw2 = (1/m)*np.dot(dz2, a1.T)
    db2 = (1/m)*np.sum(dz2, axis = 1, keepdims = True)
    
    dz1 = (1/m)*np.dot(w2.T, dz2)*derivative_relu(a1)
    dw1 = (1/m)*np.dot(dz1, x.T)
    db1 = (1/m)*np.sum(dz1, axis = 1, keepdims = True)
    
    gradients = {
        "dw1" : dw1,
        "db1" : db1,
        "dw2" : dw2,
        "db2" : db2
    }
    
    return gradients

In [13]:
def update_parameters(parameters, gradients, learning_rate):
    
    w1 = parameters['w1']
    b1 = parameters['b1']
    w2 = parameters['w2']
    b2 = parameters['b2']
    
    dw1 = gradients['dw1']
    db1 = gradients['db1']
    dw2 = gradients['dw2']
    db2 = gradients['db2']
    
    w1 = w1 - learning_rate*dw1
    b1 = b1 - learning_rate*db1
    w2 = w2 - learning_rate*dw2
    b2 = b2 - learning_rate*db2
    
    parameters = {
        "w1" : w1,
        "b1" : b1,
        "w2" : w2,
        "b2" : b2
    }
    
    return parameters


In [14]:
def model(x, y, n_h, learning_rate, iterations):
    
    n_x = x.shape[0]
    n_y = y.shape[0]
    
    cost_list = []
    
    parameters = initialize_parameters(n_x, n_h, n_y)
    
    for i in range(iterations):
        
        forward_cache = forward_propagation(x, parameters)
        
        cost = cost_function(forward_cache['a2'], y)
        
        gradients = backward_prop(x, y, parameters, forward_cache)
        
        parameters = update_parameters(parameters, gradients, learning_rate)
        
        cost_list.append(cost)
        
        if(i%(iterations/10) == 0):
            print("Cost after", i, "iterations is :", cost)
        
    return parameters, cost_list

In [15]:
iterations = 1000
n_h = 1
learning_rate = 0.03
Parameters, Cost_list = model(X_train, Y_train, n_h = n_h, learning_rate = learning_rate, iterations = iterations)

Cost after 0 iterations is : 2563544064.100525
Cost after 100 iterations is : 1171338871.4405346
Cost after 200 iterations is : 1171335394.8289611
Cost after 300 iterations is : 1171332986.8519666
Cost after 400 iterations is : 1171330578.8043098
Cost after 500 iterations is : 1171328170.6851702
Cost after 600 iterations is : 1171325762.4945464
Cost after 700 iterations is : 1171323354.232436
Cost after 800 iterations is : 1171320945.8988376
Cost after 900 iterations is : 1171318537.493749


In [16]:
def predict(X_test, parameters):
    forward_cache = forward_propagation(X_test, parameters)
    return forward_cache["a2"]

In [17]:
y_pred = predict(X_test, Parameters)

In [18]:
def accuracy(Y_test, y_pred):
    return np.sqrt((np.square(Y_test - y_pred)).mean())
print("Rmse error on test dataset = ", accuracy(Y_test, y_pred))

Rmse error on test dataset =  0.9726776770084595
