In [43]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math

In [44]:
data=pd.read_csv('func/train100.txt',delimiter=' ')
txt = pd.DataFrame(data).to_numpy()

In [45]:
X_train = txt[:, :2]
Y_train = txt[:, 2]

In [46]:
def relu(X):
   return np.maximum(0,X)

In [47]:
def sigmoid(x):
  return 1/(1+np.exp(-x))

In [48]:
lam=1

In [49]:
def compute_cost(A3, Y, parameters):
    W1 = parameters["W1"]
    W2 = parameters["W2"]
    W3 = parameters["W3"]
    
    m = 99
    cost = (np.sum((Y-A3)**2)/m)+(lam* (np.sum(np.square(W1)) + np.sum(np.square(W2)) + np.sum(np.square(W3))) / (2 * m))
    return cost

In [50]:
def backward_propagation(parameters, cache, X, Y):
   
    m=99
   
    W1 = parameters['W1']
    W2 = parameters['W2']
    W3 = parameters['W3']
        
    (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3) = cache
    
    
    dZ3 = A3 - Y
    dW3 = (1 / m) * np.dot(dZ3, A2.T)+((lam * W3) / m)
    db3 = (1 / m) * np.sum(dZ3, axis=1, keepdims=True)
    dZ2 = np.multiply(np.dot(W3.T, dZ3), 1 - np.power(A2, 2))
    dW2 = (1 / m) * np.dot(dZ2, A1.T)+((lam * W2) / m)
    db2 = (1 / m) * np.sum(dZ2, axis=1, keepdims=True)
    dZ1 = np.multiply(np.dot(W2.T, dZ2), 1 - np.power(A1, 2))
    dW1 = (1 / m) * np.dot(dZ1, X.T)+((lam * W1) / m)
    db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True)
    
    grads = {"dW1": dW1,
             "db1": db1,
             "dW2": dW2,
             "db2": db2,
             "dW3": dW3,
             "db3": db3
            }
    
    return grads

In [51]:

def update_parameters(parameters, grads, learning_rate = 0.005):
    
    
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    W3 = parameters['W3']
    b3 = parameters['b3']

    dW1 = grads['dW1']
    db1 = grads['db1']
    dW2 = grads['dW2']
    db2 = grads['db2']
    dW3 = grads['dW3']
    db3 = grads['db3']

    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1
    W2 = W2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2
    W3 = W3 - learning_rate * dW3
    b3 = b3 - learning_rate * db3
    
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2,
                  "W3": W3,
                  "b3": b3
                 }
    
    return parameters

In [52]:
def update_parameters_adam(parameters, v_corrected, s_corrected,E = 1e-8, learning_rate = 0.01):
    
    
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    W3 = parameters['W3']
    b3 = parameters['b3']
    ### END CODE HERE ###
    
    # Retrieve each gradient from the dictionary "grads"
    ### START CODE HERE ### (≈ 4 lines of code)
    dW1 = v_corrected['vW1'] / np.sqrt(s_corrected['vW1'] + E)
    db1 = v_corrected['vb1'] / np.sqrt(s_corrected['vb1'] + E)
    dW2 = v_corrected['vW2'] / np.sqrt(s_corrected['vW2'] + E)
    db2 = v_corrected['vb2'] / np.sqrt(s_corrected['vb2'] + E)
    dW3 = v_corrected['vW3'] / np.sqrt(s_corrected['vW3'] + E)
    db3 = v_corrected['vb3'] / np.sqrt(s_corrected['vb3'] + E)
    ## END CODE HERE ###
    
    
    # Update rule for each parameter
    ### START CODE HERE ### (≈ 4 lines of code)
    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1
    W2 = W2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2
    W3 = W3 - learning_rate * dW3
    b3 = b3 - learning_rate * db3
    ### END CODE HERE ###
    
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2,
                  "W3": W3,
                  "b3": b3
                 }
    
    return parameters

In [53]:
def initparameters():
  
 
  
    W1 = np.random.randn(30,2) *np.sqrt(2/2)
    b1 = np.zeros(shape=(30, 1))
    W2 = np.random.randn(10, 30) * np.sqrt(2/30)
    b2 = np.zeros(shape=(10, 1))
    W3 = np.random.randn(1, 10) *np.sqrt(2/10)
    b3 = np.zeros(shape=(1, 1))
    
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2,
                  "W3": W3,
                  "b3": b3}
    
    return parameters

In [54]:
def nn_model(X, Y, parameters, learning_rate, num_iterations = 30000, print_cost=False,optmethod = 2,beta = 0.9, gamma = 0.99):
    
    # Loop (gradient descent)
    costs=[]
    velocity = {"vW1": np.zeros_like(parameters['W1']),
             "vb1": np.zeros_like(parameters['b1']),
             "vW2": np.zeros_like(parameters['W2']),
             "vb2": np.zeros_like(parameters['b2']),
             "vW3": np.zeros_like(parameters['W3']),
             "vb3": np.zeros_like(parameters['b3'])
            }
    
    rms_prop = {"vW1": np.zeros_like(parameters['W1']),
             "vb1": np.zeros_like(parameters['b1']),
             "vW2": np.zeros_like(parameters['W2']),
             "vb2": np.zeros_like(parameters['b2']),
             "vW3": np.zeros_like(parameters['W3']),
             "vb3": np.zeros_like(parameters['b3'])
            }
    
    t = 0
    
    for i in range(0, num_iterations):
         
        
        # Forward propagation. Inputs: "X, parameters". Outputs: "A2, cache".
        A3,cache = forward_propagation(X, parameters)
        
        # Cost function. Inputs: "A2, Y, parameters". Outputs: "cost".
        cost = compute_cost(A3,Y,parameters)
        if i % 100 == 0:
            costs.append(cost)

        # Backpropagation. Inputs: "parameters, cache, X, Y". Outputs: "grads".
        grads = backward_propagation(parameters, cache, X, Y)
        
        if(optmethod == 0):
            parameters = update_parameters(parameters, grads,learning_rate)
       
        else:
            velocity['vW1'] = beta * velocity['vW1'] + (1 - beta) * grads['dW1']
            velocity['vb1'] = beta * velocity['vb1'] + (1 - beta) * grads['db1']
            velocity['vW2'] = beta * velocity['vW2'] + (1 - beta) * grads['dW2']
            velocity['vb2'] = beta * velocity['vb2'] + (1 - beta) * grads['db2']
            velocity['vW3'] = beta * velocity['vW3'] + (1 - beta) * grads['dW3']
            velocity['vb3'] = beta * velocity['vb3'] + (1 - beta) * grads['db3']
            
            if(optmethod == 1):
                parameters = update_parameters_momentum(parameters, velocity , learning_rate)
            else:
                rms_prop['vW1'] = gamma * rms_prop['vW1'] + (1 - gamma) * np.power(grads['dW1'],2)
                rms_prop['vb1'] = gamma * rms_prop['vb1'] + (1 - gamma) * np.power(grads['db1'],2)
                rms_prop['vW2'] = gamma * rms_prop['vW2'] + (1 - gamma) * np.power(grads['dW2'],2)
                rms_prop['vb2'] = gamma * rms_prop['vb2'] + (1 - gamma) * np.power(grads['db2'],2)
                rms_prop['vW3'] = gamma * rms_prop['vW3'] + (1 - gamma) * np.power(grads['dW3'],2)
                rms_prop['vb3'] = gamma * rms_prop['vb3'] + (1 - gamma) * np.power(grads['db3'],2)
                
                E = 1e-08
                
                v_correct = {"vW1": np.zeros_like(parameters['W1']),
                "vb1": np.zeros_like(parameters['b1']),
                "vW2": np.zeros_like(parameters['W2']),
                "vb2": np.zeros_like(parameters['b2']),
                "vW3": np.zeros_like(parameters['W3']),
                "vb3": np.zeros_like(parameters['b3'])
                }
                
                s_correct = {"vW1": np.zeros_like(parameters['W1']),
                "vb1": np.zeros_like(parameters['b1']),
                "vW2": np.zeros_like(parameters['W2']),
                "vb2": np.zeros_like(parameters['b2']),
                "vW3": np.zeros_like(parameters['W3']),
                "vb3": np.zeros_like(parameters['b3'])
                }
                
                t = t + 1
                
                v_correct['vW1'] = velocity['vW1']/(1 - np.power(beta,t))
                v_correct['vb1'] = velocity['vb1']/(1 - np.power(beta,t))
                v_correct['vW2'] = velocity['vW2']/(1 - np.power(beta,t))
                v_correct['vb2'] = velocity['vb2']/(1 - np.power(beta,t))
                v_correct['vW3'] = velocity['vW3']/(1 - np.power(beta,t))
                v_correct['vb3'] = velocity['vb3']/(1 - np.power(beta,t))
            
                s_correct['vW1'] = rms_prop['vW1']/(1 - np.power(gamma,t))
                s_correct['vb1'] = rms_prop['vb1']/(1 - np.power(gamma,t))
                s_correct['vW2'] = rms_prop['vW2']/(1 - np.power(gamma,t))
                s_correct['vb2'] = rms_prop['vb2']/(1 - np.power(gamma,t))
                s_correct['vW3'] = rms_prop['vW3']/(1 - np.power(gamma,t))
                s_correct['vb3'] = rms_prop['vb3']/(1 - np.power(gamma,t))
                
                parameters = update_parameters_adam(parameters, v_correct, s_correct,E = 1e-8, learning_rate = learning_rate)
        
        if(i%1000==0):
            print("cost ",cost)
        if(cost < 0.3):
            break

            
    return parameters,costs

In [55]:
def forward_propagation(X, parameters):
    
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    W3 = parameters['W3']
    b3 = parameters['b3']
   
    
    
    Z1 = np.dot(W1, X) + b1
    A1 = np.tanh(Z1)
    D1 = np.random.rand(A1.shape[0], A1.shape[1])    
    Z2 = np.dot(W2, A1) + b2
    A2 = np.tanh(Z2)
    D2 = np.random.rand(A2.shape[0], A2.shape[1])    
    Z3 = np.dot(W3, A2) + b3
    A3 = Z3
    
   
    
    cache = (Z1, D1, A1, W1, b1, Z2, D2, A2, W2, b2, Z3, A3, W3, b3)
    
    return A3, cache


In [56]:
params = initparameters()

In [42]:
lr = 0.005
parameters,costs = nn_model(X_train.T, Y_train.T, params, lr,  num_iterations =30000, print_cost=True,optmethod = 2)

cost  1021.1785432032209
cost  38.00450644504353
cost  4.589565869908675
cost  4.1684663085462415
cost  4.025425449496095
cost  3.976961111483827
cost  3.8542734332018966
cost  3.816781067772704
cost  3.779785392976162
cost  3.7897710388412147
cost  3.7484694166138053
cost  3.7184235202580873
cost  3.727109179501615
cost  3.6921011266026857
cost  3.6873075454011106
cost  3.685093406354002
cost  3.6662217751410417
cost  3.6662250940281016
cost  3.658321663599761
cost  3.658399736977666
cost  3.674711348419859
cost  3.6523803230897753
cost  3.6403431199896055
cost  3.627849878949233
cost  3.633504674848118
cost  3.6171270728433624
cost  3.6161339265920027
cost  3.608837826561677
cost  3.6064364922787533
cost  3.6074817681514304


In [None]:
plt.plot(costs)
plt.ylabel('cost')
plt.xlabel('iterations (per hundreds)')

plt.show()

In [17]:
def predict(parameters,X,Y):
    
   
    m=X.shape[1]
    A3, cache = forward_propagation(X, parameters)
    
    cost = np.sum((Y-A3)**2)/m
    print(cost)
    return A3

In [18]:
data=pd.read_csv('func/val.txt',delimiter=' ')

text = pd.DataFrame(data).to_numpy()

X_val = text[:, :2]
Y_val = text[:, 2]

In [19]:
pred=predict(parameters,X_val.T,Y_val.T)

105.32763083936258
