In [1]:
import torch
import pandas as pd

In [2]:
def import_data(file_name):
    data = pd.read_csv(file_name)
    print(data.shape)
    tensor = torch.tensor(data.values)
    tensor = tensor.float()
    return tensor
def normalize(X):
    return X/100

In [3]:
def custom_rand(a, b):
    X = torch.rand(a, b)
    return (2*X-1)
def init_params(layers_dims):
    params = dict()
    for i in range(1, len(layers_dims)):
        print(i)
        params['W' + str(i)] = custom_rand(layers_dims[i-1], layers_dims[i])
        params['b' + str(i)] = custom_rand(1, layers_dims[i])
    return params

In [4]:
def sigmoid(X):
    Z = 1/(1 + torch.exp(-X))
    return Z

In [5]:
def grads_on(params):
    for key in params:
        params[key].requires_grad = True
    return params
def update_params(params, step_size):
    for key in params:
        with torch.no_grad():
            if(params[key].requires_grad == False):
                print("ERROOOOOOOOOORRR REQUIRES GRAD SET TO 0")
            params[key] -= step_size * params[key].grad
            params[key].grad.zero_()

    return params

In [6]:
def iteration(X, Y, params, L, step_size, print_cost):
    params = grads_on(params)
    A = X
    m = X.shape[0]
    for l in range(1, L):
        Z = A@params['W' + str(l)] + params['b' + str(l)]
        A = torch.tanh(Z)
    Z = A@params['W' + str(L)] + params['b' + str(L)]
    A = sigmoid(Z)
    A = torch.clamp(A, min=1e-8)
    A = torch.clamp(A, max=1 - 1e-8)
    loss_table = -(Y*torch.log(A) + (1-Y)*torch.log(1-A))
    cost = loss_table.sum()/m
    if print_cost:
        print("THE CURRENT COST IS ", cost.item())
   # with torch.autograd.detect_anomaly():
    cost.backward()
    return update_params(params, step_size)

In [7]:
def predict(X, Y, params, L, str_type):
    A = X
    m = X.shape[0]
    for l in range(1, L):
        Z = A@params['W' + str(l)] + params['b' + str(l)]
        A = torch.tanh(Z)
    Z = A@params['W' + str(L)] + params['b' + str(L)]
    A = sigmoid(Z)
    #print("AAAAAAAAAAAA ", A)
    
    loss_table = -(Y*torch.log(A) + (1-Y)*torch.log(1-A))
    cost = loss_table.sum()/m
    Y_hat = torch.ceil(A/0.5) - 1
    incorrect = torch.abs(Y_hat - Y)
    gg = m - incorrect.sum()
    #print("THE TESTING COST IS ", cost.item())
    #print("The Model made ", gg.item(), " good predictions")
    print("(", 100*gg.item()/m, "%", str_type,"accuracy)")

In [8]:
layers_dims = [780, 60, 50, 1]
params = init_params(layers_dims)
#print(params)

1
2
3


In [9]:
train_X = import_data("train_X.csv")
train_Y = import_data("train_Y.csv")
test_X = import_data("test_X.csv")
test_Y = import_data("test_Y.csv")

(2700, 780)
(2700, 1)
(2000, 780)
(2000, 1)


In [10]:
params = iteration(train_X, train_Y, params, len(layers_dims)-1, 0.000001, 1)

THE CURRENT COST IS  1.7054636478424072


In [11]:
for i in range(20*1000 + 1):
    if (i%2500 == 0):
        print("EPOCH ", i)
    params = iteration(train_X, train_Y, params, len(layers_dims)-1, 0.01, (i%2500 == 0))
    if (i%2500 == 0):
        predict(train_X, train_Y, params, len(layers_dims)-1, "training")
        predict(test_X, test_Y, params, len(layers_dims)-1, "testing")

EPOCH  0
THE CURRENT COST IS  1.7054632902145386
( 50.77777777777778 % training accuracy)
( 53.1 % testing accuracy)
EPOCH  2500
THE CURRENT COST IS  0.5592285394668579
( 71.25925925925925 % training accuracy)
( 60.35 % testing accuracy)
EPOCH  5000
THE CURRENT COST IS  0.4537578523159027
( 79.48148148148148 % training accuracy)
( 62.4 % testing accuracy)
EPOCH  7500
THE CURRENT COST IS  0.3605955243110657
( 85.77777777777777 % training accuracy)
( 63.8 % testing accuracy)
EPOCH  10000
THE CURRENT COST IS  0.27388960123062134
( 91.03703703703704 % training accuracy)
( 64.45 % testing accuracy)
EPOCH  12500
THE CURRENT COST IS  0.19980952143669128
( 94.62962962962963 % training accuracy)
( 65.05 % testing accuracy)
EPOCH  15000
THE CURRENT COST IS  0.14253021776676178
( 96.85185185185185 % training accuracy)
( 65.6 % testing accuracy)
EPOCH  17500
THE CURRENT COST IS  0.1024007648229599
( 98.5925925925926 % training accuracy)
( 65.9 % testing accuracy)
EPOCH  20000
THE CURRENT COST IS  