In [3]:
import numpy as np
import pandas as pd

In [4]:
data = np.loadtxt("training_boston_x_y_train.csv", delimiter = ",")
data.shape

(379, 14)

In [5]:
X = data[:, 0:13]
Y = data[:, 13]

In [6]:
X

array([[-0.40784991, -0.48772236, -1.2660231 , ..., -0.30309415,
         0.41057102, -1.09799011],
       [-0.40737368, -0.48772236,  0.24705682, ...,  0.1130321 ,
         0.29116915, -0.52047412],
       [ 0.1251786 , -0.48772236,  1.01599907, ...,  0.80657583,
        -3.79579542,  0.89107588],
       ...,
       [-0.40831101, -0.48772236,  0.24705682, ...,  0.1130321 ,
         0.33206621, -0.33404299],
       [-0.41061997, -0.48772236, -1.15221381, ..., -0.71922039,
         0.203235  , -0.74475218],
       [ 0.34290895, -0.48772236,  1.01599907, ...,  0.80657583,
         0.38787479, -1.35871335]])

In [7]:
X.shape, Y.shape

((379, 13), (379,))

In [8]:
def step_gradient(X, Y, learning_rate, m, c):
    m_slope = np.zeros(len(X[0]))
    c_slope = 0 

    N = len(X)
    for i in range(N):
        x = X[i]
        y = Y[i]

        pred = np.dot(m,x) + c
        error = y - pred
        m_slope += (-2/N) * error * x 
        c_slope += (-2/N) * error
    new_m = m - learning_rate* m_slope
    new_c = c - learning_rate* c_slope

    return new_m, new_c

In [9]:
def gradient_descent(X, Y, learning_rate, num_iterations):
    m = np.zeros(X.shape[1])
    c = 0
    for i in range(0, num_iterations, 100):
        m,c = step_gradient(X, Y, learning_rate, m, c)
        print(i, "Cost: ", cost(X, Y, m,c))
    
    return m,c

In [10]:
def cost(X, Y, m, c):
    total_cost = 0
    N = len(X)
    for i in range(N):
        x = X[i]
        y = Y[i]
        pred = np.dot(m, x) + c
        total_cost += (1/N) * ((y - pred)**2)

    return total_cost

In [12]:
def score(Y_true,Y_pred):
    u = ((Y_true-Y_pred)**2).sum()
    v = ((Y_true-(Y_true.mean()))**2).sum()
    return 1-(u/v)

In [43]:
def run():
    data = np.loadtxt("training_boston_x_y_train.csv", delimiter = ",")
    X = data[:, 0:13]
    Y = data[:, 13]
    learning_rate = 0.036
    num_iterations = 8000
    m, c = gradient_descent(X, Y, learning_rate, num_iterations)
    print("Trained parameters:", m, c)

    # Load the test data
    test_data = np.loadtxt("test_boston_x_test.csv", delimiter=",")

    # Make predictions on the test set
    predictions = np.dot(test_data, m) + c
    print("Predictions", predictions)
    np.savetxt('pred.csv',predictions)

    return predictions

In [44]:
predictions = run()
np.savetxt('pred.csv',predictions)

0 Cost:  504.7277675889557
100 Cost:  432.53975299841863
200 Cost:  373.4007126780558
300 Cost:  323.5970428823503
400 Cost:  281.2124829830536
500 Cost:  244.99529418061854
600 Cost:  213.99521984082162
700 Cost:  187.437897940836
800 Cost:  164.67396609042822
900 Cost:  145.1528684802675
1000 Cost:  128.4058201548712
1100 Cost:  114.03293611838076
1200 Cost:  101.69279064084276
1300 Cost:  91.09369911997132
1400 Cost:  81.98635612519783
1500 Cost:  74.15759089750374
1600 Cost:  67.42506030714725
1700 Cost:  61.63273382824594
1800 Cost:  56.64704963394397
1900 Cost:  52.353640195056556
2000 Cost:  48.654541583427736
2100 Cost:  45.465813887404174
2200 Cost:  42.71551124881581
2300 Cost:  40.3419493919768
2400 Cost:  38.29222642069681
2500 Cost:  36.52095934222402
2600 Cost:  34.989204431024085
2700 Cost:  33.66353433219164
2800 Cost:  32.51524885980137
2900 Cost:  31.519699883596903
3000 Cost:  30.655713613918063
3100 Cost:  29.905096070301152
3200 Cost:  29.252209621666854
3300 Cost:

In [14]:
test_data = np.loadtxt("test_boston_x_test.csv", delimiter=",")
test_data.shape

(127, 13)