In [1]:
import numpy as np
import pandas as pd

In [2]:
def generic_cost(X,Y,m,c):
    M = len(X)
    total_cost = 0
    for i in range(M):
        x = X[i,:]
        y = Y[i]
        total_cost += (1/M) * ((y - (m * x).sum() - c)**2)
    return total_cost

In [3]:
def score(Y_true,Y_pred):
    u = ((Y_true - Y_pred)**2).sum()
    v = ((Y_true - Y_true.mean())**2).sum()
    return 1 - u/v

In [4]:
def generic_step_gradient_descent(X,Y,learning_rate,m,c):
    N = X.shape[1]
    m_slope, c_slope = np.zeros(N), 0
    M = len(X)
    for i in range(M):
        x = X[i,:]
        y = Y[i]
        for j in range(N):
            m_slope[j] += (-2/M) * (y - (m*x).sum() - c)*x[j]
        c_slope += (-2/M) * (y - (m*x).sum() - c)
    new_m = m - learning_rate*m_slope
    new_c = c - learning_rate*c_slope
    return new_m,new_c

In [23]:
# Generic Gradient Descent
def generic_gradient_descent(X,Y,learning_rate,num_iterations):
    N = X.shape[1]
    m, c = np.zeros(N) , 0
    for i in range(num_iterations):
        m, c = generic_step_gradient_descent(X,Y,learning_rate,m,c)
        if(i>0):
            print(i,"Cost: ", generic_cost(X,Y,m,c))
    return m,c

In [6]:
# adding all the 2d features columns and checking the score
def add_feature(dataset):
    df = pd.DataFrame(dataset)
    df1 = df.copy()
    l = dataset.shape[1]
    c = l
    s = set()
    for i in range(l):
        for j in range(l):
            c_name = str(i)+"_"+str(j)
            if(i<j):
                c_name = str(j)+"_"+str(i)
            if(c_name not in s):
                df1[c] = df[i] * df[j]
                s.add(c_name)
                c+=1
    X2 = df1.values
    return X2

In [55]:
def run_generic():
    # getting the training data
    dataset = np.genfromtxt("0000000000002419_training_ccpp_x_y_train.csv",delimiter = ",")
    X_train = np.array(dataset[:,:-1])
#     X_train = add_feature(X_train)
    Y_train = np.array(dataset[:,-1])
    
    # applying feature scaling
    from sklearn.preprocessing import StandardScaler
    scaler = StandardScaler()
    print("Shape",X_train.shape)
    scaler.fit(X_train)
    X_train = scaler.transform(X_train)
    
    # getting the test data
    X_test = np.genfromtxt("0000000000002419_test_ccpp_x_test.csv",delimiter = ",")
#     X_test = add_feature(X_test)
#     X_test = scaler.transform(X_test)
    
    # defining parameters and calling calling gradient descent function
    learning_rate = 0.2
    num_iterations = 100
    m, c = generic_gradient_descent(X_train,Y_train,learning_rate,num_iterations)
    
    Y_train_pred = (m * X_train).sum(axis=1) + c
    Y_test_pred = (m * X_test).sum(axis=1) + c
    print("Score is: ",score(Y_train,Y_train_pred))
    np.savetxt("Y_pred1.csv",Y_test_pred,delimiter = ",",fmt='%f')

In [56]:
run_generic()

Shape (7176, 4)
1 Cost:  26799.242503792175
2 Cost:  9667.643213351957
3 Cost:  3499.3247676562505
4 Cost:  1278.0283196819855
5 Cost:  477.8160269712305
6 Cost:  189.29840013368795
7 Cost:  85.06306229647086
8 Cost:  47.221331673237046
9 Cost:  33.32033677022838
10 Cost:  28.068569379733468
11 Cost:  25.955379768322796
12 Cost:  24.992956681736715
13 Cost:  24.462804989575602
14 Cost:  24.104087477563205
15 Cost:  23.821187041728113
16 Cost:  23.578279429707152
17 Cost:  23.361285613198984
18 Cost:  23.164115096572573
19 Cost:  22.983675064232603
20 Cost:  22.818048282043936
21 Cost:  22.66581987471472
22 Cost:  22.525822801627825
23 Cost:  22.397037310565377
24 Cost:  22.278547925441632
25 Cost:  22.169522539596684
26 Cost:  22.069200418735647
27 Cost:  21.976884085133445
28 Cost:  21.891933095748453
29 Cost:  21.813758883785773
30 Cost:  21.74182028824115
31 Cost:  21.675619583764707
32 Cost:  21.61469890594002
33 Cost:  21.558637006529448
34 Cost:  21.50704629373865
35 Cost:  21.45