### Loading Libraries

In [1]:
import numpy as np


### Loading data

In [40]:
train_data = np.genfromtxt("0000000000002419_training_ccpp_x_y_train.csv", delimiter=",")
test_data = np.genfromtxt("0000000000002419_test_ccpp_x_test.csv", delimiter=",")

#### Creating Gradient Descent

In [32]:
def cost(train_data, m):
    total_cost = 0
    M = len(train_data)

    X = np.hstack((train_data[:,:-1], np.ones((M,1))))
    Y = train_data[:,-1]

    for i in range(M):
        xi = X[i]
        yi = Y[i]

        prediction = np.dot(xi,m)
        total_cost += (1/M) * (yi - prediction)**2

    return total_cost

In [33]:
def step_gradient(train_data, lr, m):
    M = len(train_data)
    m_slope = np.zeros(len(m))

    X = np.hstack((train_data[:,:-1], np.ones((M,1))))
    Y = train_data[:,-1]

    for i in range(M):
        xi = X[i]
        yi = Y[i]

        prediction = np.dot(xi, m)
        error = yi - prediction

        m_slope += (-2/M) * error * xi
    
    new_m = m - lr*m_slope

    return new_m

In [34]:
def gd(train_data, lr, iterations):
    num_features = train_data.shape[1] - 1
    m = np.zeros(num_features + 1)
    for i in range(iterations):
        m = step_gradient(train_data, lr, m)
        print(i, " Cost: ", cost(train_data,m))
    return m


In [35]:
def run(train_data, lr, iterations):
    m = gd(train_data, lr, iterations)
    print("Final m :", m)
    return m

#### Scaling the Data

In [42]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(train_data[:,:-1])
Y_train = train_data[:,-1]
X_test_scaled = scaler.transform(test_data)


In [51]:
m = run(train_data=np.hstack((X_train_scaled, Y_train.reshape(-1,1))), lr = 0.001, iterations= 10000)

0  Cost:  205973.71059564373
1  Cost:  205149.3893107314
2  Cost:  204328.37559535293
3  Cost:  203510.65609934967
4  Cost:  202696.2175272072
5  Cost:  201885.04663782925
6  Cost:  201077.13024430544
7  Cost:  200272.45521368374
8  Cost:  199471.00846673799
9  Cost:  198672.77697774622
10  Cost:  197877.74777426294
11  Cost:  197085.90793689337
12  Cost:  196297.24459907258
13  Cost:  195511.74494683606
14  Cost:  194729.3962186071
15  Cost:  193950.18570496797
16  Cost:  193174.10074844598
17  Cost:  192401.1287432872
18  Cost:  191631.2571352505
19  Cost:  190864.4734213767
20  Cost:  190100.76514978375
21  Cost:  189340.11991944624
22  Cost:  188582.5253799825
23  Cost:  187827.9692314425
24  Cost:  187076.43922409526
25  Cost:  186327.92315821565
26  Cost:  185582.40888387905
27  Cost:  184839.88430074707
28  Cost:  184100.3373578633
29  Cost:  183363.7560534439
30  Cost:  182630.12843466786
31  Cost:  181899.4425974834
32  Cost:  181171.68668638408
33  Cost:  180446.84889422666
3

In [52]:
def predict(m, test_data):
    y_pred = []
    m_actual = m[:-1]
    c = m[-1]

    for i in range(len(test_data)):
        ans = np.dot(test_data[i], m_actual) + c
        y_pred.append(ans)
    return y_pred


In [53]:
y_pred = predict(m, test_data = X_test_scaled)

In [54]:
np.savetxt("submission.csv", y_pred)