#### Imports

In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

#### Data

In [15]:
df = pd.read_csv("AMES_Final_DF.csv")

In [16]:
df.head()

Unnamed: 0,Lot Frontage,Lot Area,Overall Qual,Overall Cond,Year Built,Year Remod/Add,Mas Vnr Area,BsmtFin SF 1,BsmtFin SF 2,Bsmt Unf SF,...,Sale Type_ConLw,Sale Type_New,Sale Type_Oth,Sale Type_VWD,Sale Type_WD,Sale Condition_AdjLand,Sale Condition_Alloca,Sale Condition_Family,Sale Condition_Normal,Sale Condition_Partial
0,141.0,31770,6,5,1960,1960,112.0,639.0,0.0,441.0,...,0,0,0,0,1,0,0,0,1,0
1,80.0,11622,5,6,1961,1961,0.0,468.0,144.0,270.0,...,0,0,0,0,1,0,0,0,1,0
2,81.0,14267,6,6,1958,1958,108.0,923.0,0.0,406.0,...,0,0,0,0,1,0,0,0,1,0
3,93.0,11160,7,5,1968,1968,0.0,1065.0,0.0,1045.0,...,0,0,0,0,1,0,0,0,1,0
4,74.0,13830,5,5,1997,1998,0.0,791.0,0.0,137.0,...,0,0,0,0,1,0,0,0,1,0


#### Creating X and Y Features

In [17]:
X = df.drop('SalePrice', axis = 1)
y = df['SalePrice']

#### Train Test Split

In [18]:
from sklearn.model_selection import train_test_split

In [19]:
X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.1, random_state=101)

#### Scaling

In [20]:
from sklearn.preprocessing import StandardScaler

In [21]:
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

#### Eleastic Net Regularization 

In [22]:
from sklearn.linear_model import ElasticNet

In [23]:
model_elastic_net = ElasticNet()

#### Grid Search

In [24]:
param_grid = {'alpha':[1,10,20,50,90,100], 'l1_ratio':[0.01,0.1,0.2,0.5,0.9,0.95,0.99]}

In [25]:
from sklearn.model_selection import GridSearchCV

In [34]:
grid_model = GridSearchCV(estimator=model_elastic_net, param_grid = param_grid,
                          scoring='neg_mean_squared_error', verbose=1, cv=5)

In [35]:
grid_model.fit(X_train,y_train)

Fitting 5 folds for each of 42 candidates, totalling 210 fits


GridSearchCV(cv=5, estimator=ElasticNet(),
             param_grid={'alpha': [1, 10, 20, 50, 90, 100],
                         'l1_ratio': [0.01, 0.1, 0.2, 0.5, 0.9, 0.95, 0.99]},
             scoring='neg_mean_squared_error', verbose=1)

In [36]:
grid_model.best_params_

{'alpha': 1, 'l1_ratio': 0.99}

In [37]:
pd.DataFrame(grid_model.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_alpha,param_l1_ratio,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.131255,0.007987,0.001531,0.001193,1,0.01,"{'alpha': 1, 'l1_ratio': 0.01}",-911841700.0,-1017887000.0,-482763500.0,-535946700.0,-796272800.0,-748942400.0,208483200.0,12
1,0.126953,0.009717,0.001711,0.000751,1,0.1,"{'alpha': 1, 'l1_ratio': 0.1}",-891925800.0,-986014000.0,-470121300.0,-523202800.0,-775912800.0,-729435300.0,202075000.0,10
2,0.131257,0.007209,0.001526,0.000468,1,0.2,"{'alpha': 1, 'l1_ratio': 0.2}",-869458400.0,-949378100.0,-456328100.0,-508947400.0,-753043200.0,-707431000.0,194586800.0,9
3,0.149723,0.00389,0.001871,0.000249,1,0.5,"{'alpha': 1, 'l1_ratio': 0.5}",-799661400.0,-829868000.0,-417736900.0,-465862100.0,-683098100.0,-639245300.0,169186900.0,6
4,0.228384,0.020452,0.001371,0.000513,1,0.9,"{'alpha': 1, 'l1_ratio': 0.9}",-698296400.0,-635376300.0,-382442900.0,-411886600.0,-594575700.0,-544515600.0,125114500.0,4
5,0.330316,0.011588,0.0014,0.000487,1,0.95,"{'alpha': 1, 'l1_ratio': 0.95}",-682110700.0,-606423700.0,-380697600.0,-407007700.0,-587833400.0,-532814600.0,118069200.0,2
6,0.736837,0.09983,0.001006,1.6e-05,1,0.99,"{'alpha': 1, 'l1_ratio': 0.99}",-666961800.0,-587901800.0,-381902400.0,-409370800.0,-588045900.0,-526836500.0,111278700.0,1
7,0.037187,0.001514,0.001875,0.000448,10,0.01,"{'alpha': 10, 'l1_ratio': 0.01}",-2306002000.0,-2809526000.0,-1608172000.0,-1523324000.0,-2276677000.0,-2104740000.0,479878300.0,25
8,0.036031,0.002899,0.001474,0.000659,10,0.1,"{'alpha': 10, 'l1_ratio': 0.1}",-2196049000.0,-2680776000.0,-1513349000.0,-1440617000.0,-2158587000.0,-1997876000.0,464059700.0,23
9,0.038421,0.003571,0.001802,0.000368,10,0.2,"{'alpha': 10, 'l1_ratio': 0.2}",-2067817000.0,-2529241000.0,-1403406000.0,-1345017000.0,-2020851000.0,-1873266000.0,444915900.0,22


#### Evaluation

In [38]:
y_pred = grid_model.predict(X_test)

In [39]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [40]:
MAE = mean_absolute_error(y_test, y_pred)
MAE

14346.412216097959

In [41]:
RMSE = np.sqrt(mean_squared_error(y_test, y_pred))
RMSE

20785.253227320554