### Linear Regression

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv("../DATA/AMES_Final_DF.csv")

In [3]:
df.head()

Unnamed: 0,Lot Frontage,Lot Area,Overall Qual,Overall Cond,Year Built,Year Remod/Add,Mas Vnr Area,BsmtFin SF 1,BsmtFin SF 2,Bsmt Unf SF,...,Sale Type_ConLw,Sale Type_New,Sale Type_Oth,Sale Type_VWD,Sale Type_WD,Sale Condition_AdjLand,Sale Condition_Alloca,Sale Condition_Family,Sale Condition_Normal,Sale Condition_Partial
0,141.0,31770,6,5,1960,1960,112.0,639.0,0.0,441.0,...,0,0,0,0,1,0,0,0,1,0
1,80.0,11622,5,6,1961,1961,0.0,468.0,144.0,270.0,...,0,0,0,0,1,0,0,0,1,0
2,81.0,14267,6,6,1958,1958,108.0,923.0,0.0,406.0,...,0,0,0,0,1,0,0,0,1,0
3,93.0,11160,7,5,1968,1968,0.0,1065.0,0.0,1045.0,...,0,0,0,0,1,0,0,0,1,0
4,74.0,13830,5,5,1997,1998,0.0,791.0,0.0,137.0,...,0,0,0,0,1,0,0,0,1,0


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2925 entries, 0 to 2924
Columns: 274 entries, Lot Frontage to Sale Condition_Partial
dtypes: float64(11), int64(263)
memory usage: 6.1 MB


In [5]:
# Separating X features and y labels
X = df.drop(labels = 'SalePrice', axis = 1)
y = df['SalePrice']

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=101)

In [8]:
# feature Normalization 
from sklearn.preprocessing import StandardScaler

In [9]:
scaler = StandardScaler()

In [10]:
scaler.fit(X_train)

StandardScaler(copy=True, with_mean=True, with_std=True)

In [11]:
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [12]:
# Using ElasticNet Model
from sklearn.linear_model import ElasticNet

In [13]:
Elastic_net_model = ElasticNet(max_iter = 1000000)

In [14]:
# GridSearchCV for 
params = {'alpha' : [0.1, 1, 5, 10, 100, 1000],
          'l1_ratio' : [0.1, 0.5, 0.9, 0.99, 1.0]}

In [15]:
# Using GridSearchCV for hyperparameter tuning
from sklearn.model_selection import GridSearchCV

In [16]:
grid = GridSearchCV(estimator = Elastic_net_model, param_grid = params, cv = 5, scoring = 'neg_mean_squared_error', verbose = 2)

In [17]:
grid.fit(X_train, y_train)

Fitting 5 folds for each of 30 candidates, totalling 150 fits
[CV] alpha=0.1, l1_ratio=0.1 .........................................
[CV] .......................... alpha=0.1, l1_ratio=0.1, total=   0.2s
[CV] alpha=0.1, l1_ratio=0.1 .........................................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s


[CV] .......................... alpha=0.1, l1_ratio=0.1, total=   0.2s
[CV] alpha=0.1, l1_ratio=0.1 .........................................
[CV] .......................... alpha=0.1, l1_ratio=0.1, total=   0.2s
[CV] alpha=0.1, l1_ratio=0.1 .........................................
[CV] .......................... alpha=0.1, l1_ratio=0.1, total=   0.2s
[CV] alpha=0.1, l1_ratio=0.1 .........................................
[CV] .......................... alpha=0.1, l1_ratio=0.1, total=   0.2s
[CV] alpha=0.1, l1_ratio=0.5 .........................................
[CV] .......................... alpha=0.1, l1_ratio=0.5, total=   0.2s
[CV] alpha=0.1, l1_ratio=0.5 .........................................
[CV] .......................... alpha=0.1, l1_ratio=0.5, total=   0.2s
[CV] alpha=0.1, l1_ratio=0.5 .........................................
[CV] .......................... alpha=0.1, l1_ratio=0.5, total=   0.2s
[CV] alpha=0.1, l1_ratio=0.5 .........................................
[CV] .

[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:  3.7min finished


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=ElasticNet(alpha=1.0, copy_X=True, fit_intercept=True,
                                  l1_ratio=0.5, max_iter=1000000,
                                  normalize=False, positive=False,
                                  precompute=False, random_state=None,
                                  selection='cyclic', tol=0.0001,
                                  warm_start=False),
             iid='warn', n_jobs=None,
             param_grid={'alpha': [0.1, 1, 5, 10, 100, 1000],
                         'l1_ratio': [0.1, 0.5, 0.9, 0.99, 1.0]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='neg_mean_squared_error', verbose=2)

In [18]:
grid.best_estimator_

ElasticNet(alpha=100, copy_X=True, fit_intercept=True, l1_ratio=1.0,
           max_iter=1000000, normalize=False, positive=False, precompute=False,
           random_state=None, selection='cyclic', tol=0.0001, warm_start=False)

In [19]:
grid.best_params_

{'alpha': 100, 'l1_ratio': 1.0}

In [20]:
y_pred = grid.predict(X_test)

In [21]:
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

In [22]:
mean_absolute_error(y_test, y_pred)

14195.354900562166

In [23]:
np.sqrt(mean_squared_error(y_test, y_pred))

20558.508566893153

In [24]:
per_error = sum(abs((y_test - y_pred) / y_test) * 100) / len(y_test)
accuracy = 100 - per_error
print('Accuracy : {} %'.format(round(accuracy, 4)))

Accuracy : 91.467 %
