In [14]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

# Load and preprocess data
data = pd.read_csv("./assets/training_boston_x_y_train.csv", delimiter=",")
stand_sc = preprocessing.StandardScaler()
stand_sc.fit(data)

# Separate features and target variable
Y = data[' Y']
del data[' Y']

# Add a column of ones for the bias term
data['lc'] = 1

# Convert data to numpy array
points = np.array(data)

# Define the step_gradient function with L2 regularization
def step_gradient(points, learning_rate, m, lambda_reg):
    m_slope = np.zeros(14)
    M = len(points)
    for i in range(M):
        x = points[i]
        y = Y[i]
        for j in range(14):
            m_slope[j] = m_slope[j] + (-2/M) * (y - (m * x).sum()) * x[j]
    for j in range(14):
        m[j] = m[j] - learning_rate * (m_slope[j] + lambda_reg * m[j])
    return m

# Define the gradient descent function
def gd(points, learning_rate, num_iterations, lambda_reg):
    m = np.zeros(14)
    for i in range(num_iterations):
        m = step_gradient(points, learning_rate, m, lambda_reg)
        #print(i, " Cost: ", cost(points, m, lambda_reg))
    return m

# Define the cost function with L2 regularization
def cost(points, m, lambda_reg):
    total_cost = 0
    M = len(points)
    for i in range(M):
        x = points[i]
        y = Y[i]
        total_cost += (1/M) * ((y - (m * x).sum()) ** 2)
    total_cost += lambda_reg * np.sum(m**2)
    return total_cost

# Run gradient descent with hyperparameter tuning
def run():
    learning_rate = 0.2
    num_iterations = 100
    lambda_reg = 0.1  # Regularization parameter
    m = gd(points, learning_rate, num_iterations, lambda_reg)
    return m

m = run()

# Load test data
data_test1 = pd.read_csv('./assets/test_boston_x_test.csv', delimiter=',')
data_test1['C'] = 1

# Convert test data to numpy array
Testing1 = np.array(data_test1)

# Make predictions on test data
y_Test_predict = []
for i in range(len(data_test1)):
    X = Testing1[i, :15]
    y = (m * X).sum()
    y_Test_predict.append(y)

# Convert predictions to numpy array and round
y_Test_predict = np.array(y_Test_predict)
y_Test_predict = np.around(y_Test_predict, decimals=5)
np.savetxt("boston_predictions.csv", y_Test_predict, delimiter=",")