In [61]:
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import clear_output
from sklearn import preprocessing

In [63]:
data = np.loadtxt("0000000000002419_training_ccpp_x_y_train.csv",delimiter=",")

In [41]:
data.shape

(7176, 5)

In [42]:
X = data[:,0:4]
Y = data[:,4]

In [43]:
scaler = preprocessing.StandardScaler()
scaler.fit(X)

In [44]:
X_scaled = scaler.transform(X)

In [45]:
X_scaled.shape,Y.shape

((7176, 4), (7176,))

In [46]:
X_ = np.append(X_scaled,np.ones(X_scaled.shape[0]).reshape(-1,1),axis=1)

In [47]:
X_.shape

(7176, 5)

In [48]:
def cost(X_train,m,Y_train):
    cost_ = 0
    N = X_train.shape[0]
    for i in range(N):
        cost_ += (1/N)*((Y_train[i]-((X_train[i]*m).sum()))**2)
    return cost_

In [49]:
def step_gradient(X_train,Y_train,m,lr):
    
    slope_m = np.zeros(X_train.shape[1])
    N = X_train.shape[0]
    
    for i in range(N):
        slope_m += (2/N)*(Y_train[i]-(m*X_train[i]).sum())*(-X_train[i])
        
    m = m - (lr * slope_m)
    
    return m

In [50]:
def fit(X_train,Y_train,lr=0.001,epochs=100,verbose=False):
    m = np.zeros(X_train.shape[1])
    last_m = m
    cost_array = []
    unit = epochs//100
    for i in range(epochs+1):
        last_m = m
        m = step_gradient(X_train,Y_train,m,lr)
        cost_ = cost(X_train,m,Y_train)
        
        #verbose is used to only show the learning process if required, else it is False by default
        if verbose and i%unit==0:
            print("epoch:",i,"cost:",cost_)
            cost_array.append(cost_)
    
    #To continue the learning loop incase the current epoch and learning parameters doesn't lead to saturation
    cont = input("Do you want to continue?:")
        
    while cont=="y":
        clear_output(wait=True)
        epochs = int(input("Please enter the number of epochs to continue for:"))
        unit = int(input("Please enter the unit point for epochs:"))
        lr_factor = float(input("Please enter the decay factor for the learning rate:"))
        lr*=lr_factor
        for i in range(epochs+1):
            last_m = m
            m = step_gradient(X_train,Y_train,m,lr)
            cost_ = cost(X_train,m,Y_train)
            if verbose and i%unit==0:
                print("epoch:",i,"cost:",cost_)
                cost_array.append(cost_)
        cont = input("Do you want to continue?:")
        
    return m,cost_array
    

In [51]:
def predict(X_test,m):
    y_pred = []
    for i in range(X_test.shape[0]):
        y_pred.append((X_test[i]*m).sum())
    return np.array(y_pred)
    

In [52]:
def score(Y_true,Y_pred):
    u = ((Y_true-Y_pred)**2).sum()
    v = ((Y_true-(Y_true.mean()))**2).sum()
    return 1-(u/v)

In [53]:
m, cost_array = fit(X_,Y,epochs=1000,lr=0.1,verbose=True)
print(m)

epoch: 0 cost: 132273.17028197853
epoch: 10 cost: 1552.762193756394
epoch: 20 cost: 43.193704743608556
epoch: 30 cost: 24.176010622787736
epoch: 40 cost: 22.937255128393236
epoch: 50 cost: 22.252171183169498
epoch: 60 cost: 21.800137519716298
epoch: 70 cost: 21.50059001236028
epoch: 80 cost: 21.302047696213297
epoch: 90 cost: 21.170449172278783
epoch: 100 cost: 21.08322226502254
epoch: 110 cost: 21.025405983415762
epoch: 120 cost: 20.98708383277924
epoch: 130 cost: 20.961682904083958
epoch: 140 cost: 20.944846500881333
epoch: 150 cost: 20.93368689029104
epoch: 160 cost: 20.926290007059148
epoch: 170 cost: 20.921387158424483
epoch: 180 cost: 20.91813742147707
epoch: 190 cost: 20.915983410408472
epoch: 200 cost: 20.914555675253386
epoch: 210 cost: 20.9136093348644
epoch: 220 cost: 20.91298207557985
epoch: 230 cost: 20.912566311638262
epoch: 240 cost: 20.9122907323695
epoch: 250 cost: 20.912108071186392
epoch: 260 cost: 20.911986998553033
epoch: 270 cost: 20.91190674842922
epoch: 280 cost

In [54]:
y_pred = predict(X_,m)
y_pred.shape

(7176,)

In [55]:
score(Y,y_pred)

0.9287632000440599

In [56]:
X_test = np.loadtxt("0000000000002419_test_ccpp_x_test.csv",delimiter=",")


In [57]:
X_test_scaled = scaler.transform(X_test)


In [58]:
X_test_scaled_ = np.append(X_test_scaled,np.ones(X_test_scaled.shape[0]).reshape(-1,1),axis=1)


In [59]:
y_pred_test = predict(X_test_scaled_,m)


In [60]:
np.savetxt('pred_feature_scaling.csv',y_pred_test)
