# Boston Housing Linear Regression

In [None]:
from sys import path
import pandas as pd
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
path.append("..")

## Select the Imput File

In [None]:
inputFile = "../data/Boston_Housing_Data.csv"

## Read in data and create a DataFrame 

In [None]:
df = pd.read_csv(inputFile,delimiter=";")

print(df.info())

## Feature selection

In [None]:
df_features = df.drop(["MEDV","CAT"],axis=1) # drop label attribute from the features
df_labels = df[["MEDV"]].copy()
display(df_features)
display(df_labels)

## Training and test data split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df_features,df_labels,test_size=0.3,random_state=1234)
display (X_train)
display (X_test) 
display (y_train)
display (y_test)

## Build and train the model

In [None]:
lr = LinearRegression() 
# TODO Try the different regularizers
rr = Ridge()
lasso = Lasso()
en = ElasticNet()
# TODO Try different settings
# TODO Try different feature settings
from random import randint
alpha = randint(1,10)
l1_ratio = randint(0,10)/10.0

rr_random = Ridge(alpha=alpha)
lasso_random = Lasso(alpha=alpha)
en_random = ElasticNet(alpha=alpha, l1_ratio=l1_ratio)

# Fit the models
lr_model = lr.fit(X_train, y_train)
rr_model = rr.fit(X_train, y_train)
lasso_model = lasso.fit(X_train, y_train)
en_model = en.fit(X_train, y_train)

rr_model_random = rr_random.fit(X_train, y_train)
lasso_model_random = lasso_random.fit(X_train, y_train)
en_model_random = en_random.fit(X_train, y_train)

## Test the Model

In [None]:
predictions_lr = lr_model.predict(X_test)
predictions_rr = rr_model.predict(X_test)
predictions_lasso = lasso_model.predict(X_test)
predictions_en = en_model.predict(X_test)

predictions_rr_random = rr_model_random.predict(X_test)
predictions_lasso_random = lasso_model_random.predict(X_test)
predictions_en_random = en_model_random.predict(X_test)

In [None]:
rmse_lr = mean_squared_error(y_test,predictions_lr)
rmse_rr = mean_squared_error(y_test,predictions_rr)
rmse_lasso = mean_squared_error(y_test,predictions_lasso)
rmse_en = mean_squared_error(y_test,predictions_en)

rmse_rr_random = mean_squared_error(y_test,predictions_rr_random)
rmse_lasso_random = mean_squared_error(y_test,predictions_lasso_random)
rmse_en_random = mean_squared_error(y_test,predictions_en_random)


print("root mean square error LR = " , rmse_lr)
print("root mean square error RR = " , rmse_rr)
print("root mean square error Lasso = " , rmse_lasso)
print("root mean square error EN = " , rmse_en)

print("--- With random hyperparameters ---")
print("alpha for RR and Lasso: ", alpha)
print("l1_ratio for EN: ", l1_ratio)
print("root mean square error RR random = " , rmse_rr_random)
print("root mean square error Lasso random = " , rmse_lasso_random)
print("root mean square error EN random = " , rmse_en_random)

# Check what is the lowest RMSE
min_rmse = min(rmse_lr, rmse_rr, rmse_lasso, rmse_en, rmse_rr_random, rmse_lasso_random, rmse_en_random)
print(f"The lowest RMSE is: {min_rmse}")