### Loading libraries

In [2]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn import model_selection

### Loading Data

In [3]:
train_data = np.genfromtxt("0000000000002417_training_boston_x_y_train.csv", delimiter=",")
test_data = np.genfromtxt("0000000000002417_test_boston_x_test.csv", delimiter=",")

In [3]:
'''
X = train_data[:,0:13]
Y = train_data[:,13]

'''

'\nX = train_data[:,0:13]\nY = train_data[:,13]\n\n'

In [4]:
def cost(points, m):
    total_cost = 0
    M = len(points)
    num_features = points.shape[1] - 1  
    
    
    X = np.hstack((points[:, :-1], np.ones((M, 1))))  
    y = points[:, -1]  
    
    
    for i in range(M):
        xi = X[i]  
        yi = y[i]   
        prediction = np.dot(xi, m)  
        total_cost += (1/M) * (yi - prediction)**2
    
    return total_cost

In [5]:
def step_gradient(points, learning_rate, m):
    M = len(points)
    num_features = points.shape[1] - 1  
    m_slope = np.zeros(num_features + 1)  
    
    
    X = np.hstack((points[:, :-1], np.ones((M, 1))))  
    y = points[:, -1]  

    for i in range(M):
        xi = X[i] 
        yi = y[i]   
        prediction = np.dot(xi, m)  
        error = yi - prediction

        m_slope += (-2/M) * error * xi
    
    
    new_m = m - learning_rate * m_slope
    
    return new_m

In [6]:
def gd(points, learning_rate, num_iterations):
    num_features = points.shape[1] - 1  
    m = np.zeros(num_features + 1)  
    
    for i in range(num_iterations):
        m = step_gradient(points, learning_rate, m)
        print(i, " Cost: ", cost(points, m))
    
    return m

In [7]:
def run(train_data, lr, iterations):
    m = gd(train_data, lr, iterations)
    print("Final m :", m)
    return m

In [8]:
m = run(train_data, lr=0.001, iterations=10000)

0  Cost:  596.2960783537657
1  Cost:  593.4930246625086
2  Cost:  590.7127313243425
3  Cost:  587.9548300926728
4  Cost:  585.218960999313
5  Cost:  582.5047721533879
6  Cost:  579.8119195451965
7  Cost:  577.1400668549089
8  Cost:  574.4888852659761
9  Cost:  571.8580532831343
10  Cost:  569.2472565548985
11  Cost:  566.6561877004244
12  Cost:  564.0845461406376
13  Cost:  561.5320379335218
14  Cost:  558.9983756134643
15  Cost:  556.4832780345572
16  Cost:  553.9864702177566
17  Cost:  551.5076832018107
18  Cost:  549.0466538978485
19  Cost:  546.6031249475609
20  Cost:  544.1768445848618
21  Cost:  541.7675665009613
22  Cost:  539.3750497127576
23  Cost:  536.9990584344666
24  Cost:  534.6393619524166
25  Cost:  532.2957345029155
26  Cost:  529.9679551531284
27  Cost:  527.6558076848847
28  Cost:  525.3590804813448
29  Cost:  523.077566416448
30  Cost:  520.8110627470901
31  Cost:  518.5593710079447
32  Cost:  516.3222969088707
33  Cost:  514.0996502348472
34  Cost:  511.89124474836

In [9]:
def predict(m, testing_data):
    y_pred = []
    m_actual = m[:-1]
    c = m[-1]
    for i in range(len(testing_data)):
        ans = np.dot(testing_data[i], m_actual) + c
        y_pred.append(ans)
    return y_pred
    

In [10]:
y_pred = predict(m, test_data)


In [11]:
print(y_pred)

[12.309559913272826, 28.87591593643925, 22.401559724944995, 24.341613188976446, 20.863815375518175, 2.7802785006235133, 30.044893506879855, 24.737225140709352, 18.7429083524916, 23.520263241990214, 24.156483159798437, 17.80251499246154, 17.875790314155065, 21.790762818739918, 42.4502697781885, 24.002022556285077, 24.46855234589135, 27.69694123916807, 20.366278416861885, 31.33170078200815, 24.05819340204875, 24.926291709182028, 33.91182448629645, 36.28916085025651, 32.15421677534958, 16.518611327710676, 23.4468519561765, 33.14713232722301, 24.940061586368415, 33.67012130757299, 16.993795653508414, 26.18555770910646, 23.344242890035268, 25.455794593064834, 14.994772593318421, 29.612581996505877, 26.22674243505181, 20.59775537970458, 24.268738933588345, 9.46339569855446, 8.19285769986971, 28.901775370524867, 29.650621508611327, 19.889389507355673, 20.322599297286445, 3.036280980538205, 39.5102515686837, 25.735653009934687, 30.082882019877353, 16.794153050969637, 17.82184242831783, 40.6116

#### Using Feature Scaling

In [12]:
from sklearn. preprocessing import StandardScaler

scaler = StandardScaler()

X_train = train_data[:, :-1]  
y_train = train_data[:, -1]   

X_test = test_data


X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [17]:
m_scaled = run(np.hstack((X_train_scaled, y_train.reshape(-1, 1))), lr=0.01, iterations=25000)

0  Cost:  570.4056234550059
1  Cost:  544.1557876195511
2  Cost:  519.9746884568692
3  Cost:  497.54827186134764
4  Cost:  476.62764245535465
5  Cost:  457.0144864103584
6  Cost:  438.5498026649458
7  Cost:  421.1051897031657
8  Cost:  404.57610644915957
9  Cost:  388.8766582035201
10  Cost:  373.93556077819653
11  Cost:  359.69301494211953
12  Cost:  346.09828426862333
13  Cost:  333.10781657123675
14  Cost:  320.68378548746756
15  Cost:  308.79295686224026
16  Cost:  297.405806279028
17  Cost:  286.49583084371835
18  Cost:  276.0390112683976
19  Cost:  266.0133902981697
20  Cost:  256.3987412445874
21  Cost:  247.17630635235344
22  Cost:  238.32858933178227
23  Cost:  229.83918994707403
24  Cost:  221.69267129841487
25  Cost:  213.8744525586082
26  Cost:  206.37072156469915
27  Cost:  199.16836293181393
28  Cost:  192.25489833512924
29  Cost:  185.61843636206189
30  Cost:  179.2476299211098
31  Cost:  173.13163964534084
32  Cost:  167.2601020776073
33  Cost:  161.62310169440175
34  C

In [18]:
y_pred_scaled = predict(m_scaled, X_test_scaled)
print(y_pred_scaled)

[12.433283444095125, 29.03367317754836, 22.371644545783816, 24.47778655461119, 20.601669770369043, 2.7253317533216865, 30.400004093743846, 24.861200419847595, 18.65724977262626, 23.53985837500605, 24.11396871797964, 17.711438537641147, 17.440002981224886, 21.653562777856997, 42.31137693633721, 23.849744927021522, 24.475732324572228, 27.53872655349101, 20.236066941080814, 31.15155453428537, 23.78237904360383, 25.009794429953722, 33.957685411123734, 36.435156493157166, 32.04098328760962, 16.713226323062244, 23.471766110727934, 32.93828008548769, 25.1807008024051, 33.71008685247517, 16.88580202422874, 26.027608371424023, 23.270400248017836, 25.477589677652887, 15.009466308773224, 29.585749399727256, 26.248212452503147, 20.37245654108056, 24.436814977529078, 9.44706893802525, 8.380966538049648, 29.013923447982922, 29.590854031443616, 19.757569699871713, 20.37196790490275, 3.144426250491822, 39.524201179944356, 25.717412222093397, 30.377296275609606, 16.794532639658136, 17.890886524079807, 

In [19]:
np.savetxt("submission.csv", y_pred_scaled)