In [3]:
import numpy as np
import pandas as pd

In [4]:
data = np.loadtxt("training_boston_x_y_train.csv", delimiter = ",")
data.shape

(379, 14)

In [5]:
X = data[:, 0:13]
Y = data[:, 13]

In [6]:
X

array([[-0.40784991, -0.48772236, -1.2660231 , ..., -0.30309415,
         0.41057102, -1.09799011],
       [-0.40737368, -0.48772236,  0.24705682, ...,  0.1130321 ,
         0.29116915, -0.52047412],
       [ 0.1251786 , -0.48772236,  1.01599907, ...,  0.80657583,
        -3.79579542,  0.89107588],
       ...,
       [-0.40831101, -0.48772236,  0.24705682, ...,  0.1130321 ,
         0.33206621, -0.33404299],
       [-0.41061997, -0.48772236, -1.15221381, ..., -0.71922039,
         0.203235  , -0.74475218],
       [ 0.34290895, -0.48772236,  1.01599907, ...,  0.80657583,
         0.38787479, -1.35871335]])

In [7]:
X.shape, Y.shape

((379, 13), (379,))

In [8]:
def step_gradient(X, Y, learning_rate, m, c):
    m_slope = np.zeros(len(X[0]))
    c_slope = 0 

    N = len(X)
    for i in range(N):
        x = X[i]
        y = Y[i]

        pred = np.dot(m,x) + c
        error = y - pred
        m_slope += (-2/N) * error * x 
        c_slope += (-2/N) * error
    new_m = m - learning_rate* m_slope
    new_c = c - learning_rate* c_slope

    return new_m, new_c

In [9]:
def gradient_descent(X, Y, learning_rate, num_iterations):
    m = np.zeros(X.shape[1])
    c = 0
    for i in range(0, num_iterations, 100):
        m,c = step_gradient(X, Y, learning_rate, m, c)
        print(i, "Cost: ", cost(X, Y, m,c))
    
    return m,c

In [10]:
def cost(X, Y, m, c):
    total_cost = 0
    N = len(X)
    for i in range(N):
        x = X[i]
        y = Y[i]
        pred = np.dot(m, x) + c
        total_cost += (1/N) * ((y - pred)**2)

    return total_cost

In [12]:
def score(Y_true,Y_pred):
    u = ((Y_true-Y_pred)**2).sum()
    v = ((Y_true-(Y_true.mean()))**2).sum()
    return 1-(u/v)

In [37]:
def run():
    data = np.loadtxt("training_boston_x_y_train.csv", delimiter = ",")
    X = data[:, 0:13]
    Y = data[:, 13]
    learning_rate = 0.038
    num_iterations = 6000
    m, c = gradient_descent(X, Y, learning_rate, num_iterations)
    print("Trained parameters:", m, c)

    # Load the test data
    test_data = np.loadtxt("test_boston_x_test.csv", delimiter=",")

    # Make predictions on the test set
    predictions = np.dot(test_data, m) + c
    print("Predictions", predictions)
    np.savetxt('pred.csv',predictions)

    return predictions

In [38]:
predictions = run()
np.savetxt('pred.csv',predictions)

0 Cost:  499.92686341192336
100 Cost:  425.0366543501378
200 Cost:  364.1105076547577
300 Cost:  313.15938531979504
400 Cost:  270.1353688737774
500 Cost:  233.6771225571531
600 Cost:  202.73767837856934
700 Cost:  176.46137025699085
800 Cost:  154.13321022241212
900 Cost:  135.15104596447483
1000 Cost:  119.00625731464507
1100 Cost:  105.26867124542181
1200 Cost:  93.5741941885747
1300 Cost:  83.61449422432054
1400 Cost:  75.1283388548329
1500 Cost:  67.89430436552819
1600 Cost:  61.724632004286704
1700 Cost:  56.46004645745984
1800 Cost:  51.96538311241021
1900 Cost:  48.12589575252846
2000 Cost:  44.84413712981978
2100 Cost:  42.03732217906613
2200 Cost:  39.63509810097469
2300 Cost:  37.57765763881751
2400 Cost:  35.814142001496926
2500 Cost:  34.301288373378725
2600 Cost:  33.00228406909727
2700 Cost:  31.885795365297916
2800 Cost:  30.925144058437148
2900 Cost:  30.09760901443678
3000 Cost:  29.3838335222713
3100 Cost:  28.76732224799429
3200 Cost:  28.234014098872972
3300 Cost: 

In [14]:
test_data = np.loadtxt("test_boston_x_test.csv", delimiter=",")
test_data.shape

(127, 13)