In [1]:
import numpy as np
import pandas as pd
pd.set_option("display.max_columns", 300)

from sklearn.model_selection import KFold
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import ElasticNetCV

from numpy import arange
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RepeatedKFold

from sklearn.metrics import mean_squared_error, r2_score
from sklearn.feature_selection import SequentialFeatureSelector

from sklearn.model_selection import train_test_split
from sklearn import preprocessing

In [3]:
base = pd.read_csv('../Data/BaseData.csv')

In [4]:
base.shape

(2580, 299)

In [5]:
base.isnull().sum().sum()

0

## ElasticNet @ Alpha=1 and L1_ratio=0.5 ##

In [6]:
X = base.drop(['SalePrice'], axis=1).values
y = base.SalePrice.values

X_frame = base.drop(['SalePrice'], axis=1)

folds = KFold(n_splits = 5, shuffle = True, random_state = None)

for trainIndex, testIndex in folds.split(X):
    X_train, X_test = X[trainIndex], X[testIndex]
    y_train, y_test = y[trainIndex], y[testIndex]
    print(X_train.shape)
    print(y_train.shape)
    print(X_test.shape)
    print(y_test.shape)
    
    scaler = preprocessing.StandardScaler().fit(X_train)
    X_train_scaled = scaler.transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    elastic = ElasticNet(alpha=1, l1_ratio=0.5)
    print(elastic)
    
    elastic.fit(X_train_scaled, y_train)
    print('R2 Train is: ' + str(elastic.score(X_train_scaled, y_train)))
    print('R2 Test is: ' + str(elastic.score(X_test_scaled, y_test)))
    
    #the below offers same result as above, good back up:
    pred = elastic.predict(X_test_scaled)
    #print(r2_score(y_test, pred))
    
    ADJ_R2 = 1 - (1-r2_score(y_test, pred)) * (len(y)-1)/(len(y)-X.shape[1]-1)
    print('ADJ_R2 is: ' + str(ADJ_R2))
    
    results = pd.DataFrame(elastic.coef_, index=X_frame.columns)
    results.columns = ['Coef']
    Null = results[(results['Coef']>-1) & (results['Coef']<1)]
    Null_list = list(Null.index.values)
    Imp = results[(results['Coef']<-1) | (results['Coef']>1)]
    Imp_list = list(Imp.index.values)
    #print(Null_list)
    print('Features Dropped: ' + str(len(Null_list)))
    #print(Imp_list)
    print('Features Kept: ' + str(len(Imp_list)))
    print()

(2064, 298)
(2064,)
(516, 298)
(516,)
ElasticNet(alpha=1)
R2 Train is: 0.9136959975946495
R2 Test is: 0.8959648683470613
ADJ_R2 is: 0.8823732553560154
Features Dropped: 3
Features Kept: 295

(2064, 298)
(2064,)
(516, 298)
(516,)
ElasticNet(alpha=1)
R2 Train is: 0.9104607830460417
R2 Test is: 0.894772934038505
ADJ_R2 is: 0.8810256014402913
Features Dropped: 2
Features Kept: 296

(2064, 298)
(2064,)
(516, 298)
(516,)
ElasticNet(alpha=1)
R2 Train is: 0.9113882334166682
R2 Test is: 0.8823368667605828
ADJ_R2 is: 0.866964830940615
Features Dropped: 6
Features Kept: 292

(2064, 298)
(2064,)
(516, 298)
(516,)
ElasticNet(alpha=1)
R2 Train is: 0.9118587112088559
R2 Test is: 0.9082008927359755
ADJ_R2 is: 0.8962078484726351
Features Dropped: 3
Features Kept: 295

(2064, 298)
(2064,)
(516, 298)
(516,)
ElasticNet(alpha=1)
R2 Train is: 0.9221404841511653
R2 Test is: 0.8558245565475543
ADJ_R2 is: 0.8369888344305754
Features Dropped: 6
Features Kept: 292



In [25]:
X = base.drop(['SalePrice'], axis=1).values
y = base.SalePrice.values

X_frame = base.drop(['SalePrice'], axis=1)

folds = KFold(n_splits = 5, shuffle = True, random_state = None)

for trainIndex, testIndex in folds.split(X):
    X_train, X_test = X[trainIndex], X[testIndex]
    y_train, y_test = y[trainIndex], y[testIndex]
    print(X_train.shape)
    print(y_train.shape)
    print(X_test.shape)
    print(y_test.shape)
    
    scaler = preprocessing.StandardScaler().fit(X_train)
    X_train_scaled = scaler.transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    model = ElasticNet()
    # define model evaluation method
    cv = RepeatedKFold(n_splits=5, n_repeats=1, random_state=None)
    
    # define grid
    grid = dict()
    grid['alpha'] = arange(0, 1, 0.1) 
    grid['l1_ratio'] = arange(0, 1, 0.1)
    
    #define search
    search = GridSearchCV(model, grid, scoring='neg_mean_absolute_error', cv=cv, n_jobs=-1)
    
    # perform the search
    results = search.fit(X_train_scaled, y_train)
    
    # summarize
    print('alpha: %.3f' % results.best_params_['alpha'])
    print('l1_ratio: %.3f' % results.best_params_['l1_ratio'])
    
    elastic = ElasticNet(alpha=results.best_params_['alpha'],
                l1_ratio=results.best_params_['l1_ratio'])
    print(elastic)
    
    elastic.fit(X_train_scaled, y_train)
    print('R2 Train is: ' + str(elastic.score(X_train_scaled, y_train)))
    print('R2 Test is: ' + str(elastic.score(X_test_scaled, y_test)))
    
    #the below offers same result as above, good back up:
    pred = elastic.predict(X_test_scaled)
    #print(r2_score(y_test, pred))
    
    ADJ_R2 = 1 - (1-r2_score(y_test, pred)) * (len(y)-1)/(len(y)-X.shape[1]-1)
    print('ADJ_R2 is: ' + str(ADJ_R2))
    
    results = pd.DataFrame(elastic.coef_, index=X_frame.columns)
    results.columns = ['Coef']
    Null = results[(results['Coef']>-1) & (results['Coef']<1)]
    Null_list = list(Null.index.values)
    Imp = results[(results['Coef']<-1) | (results['Coef']>1)]
    Imp_list = list(Imp.index.values)
    #print(Null_list)
    print('Features Dropped: ' + str(len(Null_list)))
    #print(Imp_list)
    print('Features Kept: ' + str(len(Imp_list)))

    print()

(2064, 298)
(2064,)
(516, 298)
(516,)
alpha: 0.500
l1_ratio: 0.900
ElasticNet(alpha=0.5, l1_ratio=0.9)
R2 Train is: 0.9383947363388248
R2 Test is: 0.9109662107209425
ADJ_R2 is: 0.899334439916401
Features Dropped: 8
Features Kept: 290

(2064, 298)
(2064,)
(516, 298)
(516,)


  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.e

  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_c

alpha: 0.700
l1_ratio: 0.900
ElasticNet(alpha=0.7000000000000001, l1_ratio=0.9)
R2 Train is: 0.9378347524742662
R2 Test is: 0.896182961848005
ADJ_R2 is: 0.8826198415633516
Features Dropped: 2
Features Kept: 296

(2064, 298)
(2064,)
(516, 298)
(516,)


  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train,

  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_c

  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_c

  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.e

  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_c

alpha: 0.600
l1_ratio: 0.900
ElasticNet(alpha=0.6000000000000001, l1_ratio=0.9)
R2 Train is: 0.9375235992582543
R2 Test is: 0.9091745753183141
ADJ_R2 is: 0.8973087372844946
Features Dropped: 6
Features Kept: 292

(2064, 298)
(2064,)
(516, 298)
(516,)
alpha: 0.600
l1_ratio: 0.900
ElasticNet(alpha=0.6000000000000001, l1_ratio=0.9)
R2 Train is: 0.9435942606075776
R2 Test is: 0.8505471994738409
ADJ_R2 is: 0.8310220199224182
Features Dropped: 3
Features Kept: 295

(2064, 298)
(2064,)
(516, 298)
(516,)
alpha: 0.300
l1_ratio: 0.900
ElasticNet(alpha=0.30000000000000004, l1_ratio=0.9)
R2 Train is: 0.9385634123005346
R2 Test is: 0.914242202001496
ADJ_R2 is: 0.903038421289723
Features Dropped: 2
Features Kept: 296

