#### Model Selection 

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as pp
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import PolynomialFeatures, StandardScaler, MinMaxScaler
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.pipeline import make_pipeline
from sklearn.metrics import r2_score

In [None]:
data=pd.read_csv('model_selection_data.csv')
data.head(2)

In [None]:
# The Boston Housing Dataset 
# The Boston Housing Dataset is a derived from information collected by the U.S. Census Service concerning housing in the area of Boston MA. 
# The following describes the dataset columns: 
# CRIM - per capita crime rate by town 
# ZN - proportion of residential land zoned for lots over 25,000 sq.ft. 
# INDUS - proportion of non-retail business acres per town. 
# CHAS - Charles River dummy variable (1 if tract bounds river; 0 otherwise) 
# NOX - nitric oxides concentration (parts per 10 million) 
# RM - average number of rooms per dwelling 
# AGE - proportion of owner-occupied units built prior to 1940
# DIS - weighted distances to five Boston employment centres 
# RAD - index of accessibility to radial highways 
# TAX - full-value property-tax rate per $10,000 
# PTRATIO - pupil-teacher ratio by town 
# B - 1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town 
# LSTAT - % lower status of the population 
# Target - Median value of owner-occupied homes in $1000's

In [None]:
### Checking missing values
# data.info()
data.isnull().sum()
data.columns

In [None]:
X_train,X_test,Y_train,Y_test=train_test_split(data.loc[:, ~data.columns.isin(['Target'])],data['Target'],test_size=0.2,random_state=190)

### Base Model

In [None]:
linear_reg_pipeline=make_pipeline(StandardScaler(),PolynomialFeatures(degree=2),LinearRegression())
linear_reg_pipeline_train=linear_reg_pipeline.fit(X_train,Y_train)

In [None]:
# Prediction
y_pred_lr_train=linear_reg_pipeline_train.predict(X_train)
y_pred_lr_test=linear_reg_pipeline_train.predict(X_test)


In [None]:
# R2 Calaculions
print(f'Train R-squared value:')
r2_linear_reg_train=r2_score(Y_train,y_pred_lr_train)
print(round(r2_linear_reg_train*100,2))
print(f'Test R-squared value:')
r2_linear_reg_test=r2_score(Y_test,y_pred_lr_test)
print(round(r2_linear_reg_test*100,2))
print(f'The difference in train and test r2_score: {(r2_linear_reg_train-r2_linear_reg_test)*100:.2f}%')

The above is extremely overfitted!!!

### Model 2

In [None]:
alpha=250
ridge_reg_pipeline=make_pipeline(StandardScaler(),PolynomialFeatures(degree=2),Ridge(alpha))
ridge_reg_pipeline_train=ridge_reg_pipeline.fit(X_train,Y_train)

In [None]:
# Prediction
y_pred_ridge_train=ridge_reg_pipeline_train.predict(X_train)
y_pred_riidge_test=ridge_reg_pipeline_train.predict(X_test)


In [None]:
# R2 Calaculions
print(f'Train R-squared value:')
r2_ridge_train=r2_score(Y_train,y_pred_ridge_train)
print(round(r2_ridge_train*100,2))
print(f'Test R-squared value:')
r2_ridge_test=r2_score(Y_test,y_pred_riidge_test)
print(round(r2_ridge_test*100,2))
print(f'At alpha:{alpha}, The difference in train and test r2_score: {(r2_ridge_train-r2_ridge_test)*100:.2f}%')

### Model 3

In [None]:
alpha=0.35
ridge_reg_pipeline=make_pipeline(MinMaxScaler(),PolynomialFeatures(degree=2),Ridge(alpha))
ridge_reg_pipeline_train=ridge_reg_pipeline.fit(X_train,Y_train)

In [None]:
# Prediction
y_pred_ridge_train=ridge_reg_pipeline_train.predict(X_train)
y_pred_riidge_test=ridge_reg_pipeline_train.predict(X_test)


In [None]:
# R2 Calaculions
print(f'Train R-squared value:')
r2_ridge_train=r2_score(Y_train,y_pred_ridge_train)
print(round(r2_ridge_train*100,2))
print(f'Test R-squared value:')
r2_ridge_test=r2_score(Y_test,y_pred_riidge_test)
print(round(r2_ridge_test*100,2))
print(f'At alpha:{alpha}, The difference in train and test r2_score: {(r2_ridge_train-r2_ridge_test)*100:.2f}%')

### Model 4 - Lasso

In [None]:
alpha=0.015
lasso_reg_pipeline=make_pipeline(StandardScaler(),PolynomialFeatures(degree=2),Lasso(alpha))
lasso_reg_pipeline_train=lasso_reg_pipeline.fit(X_train,Y_train)

In [None]:
# Prediction
y_pred_lasso_train=lasso_reg_pipeline_train.predict(X_train)
y_pred_lasso_test=lasso_reg_pipeline_train.predict(X_test)


In [None]:
# R2 Calaculions
print(f'Train R-squared value:')
r2_lasso_train=r2_score(Y_train,y_pred_lasso_train)
print(round(r2_lasso_train*100,2))
print(f'Test R-squared value:')
r2_lasso_test=r2_score(Y_test,y_pred_lasso_test)
print(round(r2_lasso_test*100,2))
print(f'At alpha:{alpha}, The difference in train and test r2_score: {(r2_lasso_train-r2_lasso_test)*100:.2f}%')

### Model 5 - Lasso with Minmax scaler

In [None]:
alpha=.025
lasso_reg_pipeline=make_pipeline(MinMaxScaler(),PolynomialFeatures(degree=2),Lasso(alpha))
lasso_reg_pipeline_train=lasso_reg_pipeline.fit(X_train,Y_train)

In [None]:
# Prediction
y_pred_lasso_train=lasso_reg_pipeline_train.predict(X_train)
y_pred_lasso_test=lasso_reg_pipeline_train.predict(X_test)


In [None]:
# R2 Calaculions
print(f'Train R-squared value:')
r2_lasso_train=r2_score(Y_train,y_pred_lasso_train)
print(round(r2_lasso_train*100,2))
print(f'Test R-squared value:')
r2_lasso_test=r2_score(Y_test,y_pred_lasso_test)
print(round(r2_lasso_test*100,2))
print(f'At alpha:{alpha}, The difference in train and test r2_score: {(r2_lasso_train-r2_lasso_test)*100:.2f}%')

### Model Selection

- Linear Regression (Baseline model) : Train(94.08%) Test(80.83%) - 11.6% - Rejected
- Ridge Regression  (StandardScaler,alpha=250): Train(84.55) Test(80.97) - 3.58% - 
- Ridge Regression  (MinMaxScaler,alpha=0.35): Train(89.24) Test(85.90) - 3.34% -Selected - Less performance compared the below model we can reject
- Lasso  (StandardScaler,alpha=0.015): Train(92.12) Test(90.73) - 1.40% - Selected - High performing and less overfitted 
- Lasso  (MinMaxScaler,alpha=0.025): Train(82.45) Test(84.16) - -1.74% - Rejected as the train r2 < test r2

