In [6]:
import numpy as np
train_data = np.loadtxt('training_boston_x_y_train.csv', delimiter=',')
x_train = train_data[:,:-1]
y_train = train_data[:,-1]
print(x_train.shape)

(379, 13)


In [7]:
test_data = np.loadtxt('test_boston_x_test.csv', delimiter = ',')


In [8]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(test_data)

In [9]:
def initialize_parameters(n):
    W = np.zeros(n)
    b = 0
    return W, b

In [10]:
def compute_cost(X,Y,W,b):
    m = X.shape[0]
    cost = np.sum((np.dot(X,W)+b-Y)**2)/(2*m)
    return cost

In [11]:
def gradient_descent(X,Y,W,b, learning_rate, num_iterations):
    m = X.shape[0]
    cost_history = []
    for i in range(num_iterations):
        Y_pred = np.dot(X,W) + b
        dW = (1/m) * np.dot(X.T, (Y_pred - Y))
        db = (1/m) * np.sum(Y_pred - Y)
        W -= learning_rate * dW
        b -= learning_rate * db
        cost = compute_cost(X,Y,W,b)
        cost_history.append(cost)
        if i % 100 == 0:
            print(f'Iteration{i}: cost{cost}')
    return W, b, cost_history

In [15]:
n_features = x_train_scaled.shape[1]
W,b = initialize_parameters(n_features)
learning_rate = 0.001
num_iterations = 10000
W,b, cost_history = gradient_descent(x_train_scaled, y_train, W, b, learning_rate, num_iterations)

Iteration0: cost298.8272044797119
Iteration100: cost238.58337488295624
Iteration200: cost194.96043348189167
Iteration300: cost160.99141591398828
Iteration400: cost133.73886123407397
Iteration500: cost111.62186496132942
Iteration600: cost93.59324486684422
Iteration700: cost78.87103528561157
Iteration800: cost66.83896816497757
Iteration900: cost57.00079570234554
Iteration1000: cost48.95360300203269
Iteration1100: cost42.36920505569396
Iteration1200: cost36.97993229692506
Iteration1300: cost32.567334889263925
Iteration1400: cost28.953066894375187
Iteration1500: cost25.99148552959303
Iteration1600: cost23.563628327192525
Iteration1700: cost21.57230634572658
Iteration1800: cost19.938104296300608
Iteration1900: cost18.596118695031155
Iteration2000: cost17.49329705405777
Iteration2100: cost16.586266787015134
Iteration2200: cost15.839563273046833
Iteration2300: cost15.224183372572815
Iteration2400: cost14.716404374731608
Iteration2500: cost14.296819482716057
Iteration2600: cost13.9495499931558

In [17]:
def predict(X,W,b):
    return np.dot(X,W) + b
y_test_pred = predict(x_test_scaled, W,b)
print(y_test_pred)

[11.84741569 28.8364699  22.42046101 24.27565042 21.06130978  2.83767701
 29.44193752 24.46101044 18.83346116 23.42766519 24.37073049 17.94818484
 18.58201824 21.90179083 42.71020596 24.15664504 24.49265304 27.68518662
 20.48496557 31.40899834 24.1892983  24.68041759 33.74836192 35.91496983
 32.25422079 16.04607202 23.06435667 33.0267862  24.57091943 33.45567013
 17.20746404 26.27619673 23.52117617 25.44275056 15.03305466 29.37952211
 26.12751504 20.68744528 24.12295219  9.54954663  7.62820775 28.67774131
 29.5705285  20.13741905 20.35371504  2.76469103 39.58526517 25.7785529
 29.94197404 16.98911614 17.7242405  39.95776618 17.75984582 21.16997442
 15.89694594 21.30510981 18.49620522 23.12057358 13.91750524 17.26890906
 15.22483275 29.15017358 25.23297982 25.62458584 16.96876692 16.94513962
 34.57876888 17.0870299  26.60829349 22.41767873 29.43597316 26.84810933
 18.30343939  5.45271852 36.10354251 25.15126184 29.45823868 26.83168257
 16.124214   32.72675539 19.49309163 22.96711408 23.

In [19]:
np.savetxt('predictionstest.csv',y_test_pred, fmt = '%.5f')