### Import Libraries, Read in Data and Merge into one big dataset with year indicator variable

In [1]:
import numpy as np 
import pandas as pd
from wordcloud import STOPWORDS
import string
import datetime

In [2]:
# Read in wrangled NYC airbnb data from 2016-2020

df16 = pd.read_csv("../input/math301-final-project-data/airbnb16.csv", index_col=0)
df17 = pd.read_csv("../input/math301-final-project-data/airbnb17.csv", index_col=0)
df18 = pd.read_csv("../input/math301-final-project-data/airbnb18.csv", index_col=0)
df19 = pd.read_csv("../input/math301-final-project-data/airbnb19.csv", index_col=0)
df20 = pd.read_csv("../input/math301-final-project-data/airbnb20.csv", index_col=0)

In [3]:
# Add year variable to all the data sources
for dataf,year in zip([df16,df17,df18,df19,df20], ['16','17','18','19','20']):
    dataf['year'] = '20' + year

In [4]:
# Merge data sources into one big dataset
df = pd.concat([df16,df17,df18,df19,df20])

### Encoding Features + Feature Engineering + Split into train, test data

#### Feature Creation from "name" column

In [5]:
df['name'].fillna('missing',inplace=True) # fill ONE empty value in "name" column with word "missing"

########## Basic Meta Features for Text data

# word_count
df['name_wc'] = df['name'].apply(lambda x: len(str(x).split()))

# unique_word_count
df['name_unique_wc'] = df['name'].apply(lambda x: len(set(str(x).split())))

# stop_word_count
df['name_stop_wc'] = df['name'].apply(lambda x: len([w for w in str(x).lower().split() if w in STOPWORDS]))

# mean_word_length
df['name_mean_wl'] = df['name'].apply(lambda x: np.mean([len(w) for w in str(x).split()]))

# char_count
df['name_cc'] = df['name'].apply(lambda x: len(str(x)))

# punctuation_count
df['name_pc'] = df['name'].apply(lambda x: len([c for c in str(x) if c in string.punctuation]))

# Drop "name" column
del df['name']

#### Drop socioeconomic, demographic variables

In [6]:
# Drop socioeconomic, demographic variables

for feat in df.columns.tolist():
    if 'avg' in feat:
        del df[feat]
    elif 'total' in feat:
        del df[feat]

#### Create features for last_review

In [7]:
# Create features for last_review (last review month, last review day)

df1 = df.copy()[df.last_review != 'No Review']
df2 = df.copy()[df.last_review == 'No Review']

df1['last_review_month'] = df1['last_review'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d").month)
df1['last_review_day'] = df1['last_review'].apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d").day)

df2['last_review_month'] = 0
df2['last_review_day'] = 0

df = pd.concat([df1,df2], axis=0, sort=True)

del df['last_review']

#### Encode categorical variables into numerical

In [8]:
train_idx =  df[df.year != '2020'].index
test_idx = df[df.year == '2020'].index

In [9]:
# Create Dummy variables for categorical features

df = pd.get_dummies(df, columns= ['neighbourhood_group', 'room_type','year'], drop_first=True)

In [10]:
# Label Encode Neighborhoods (because too many variables will be created if we take the dummy variable approach)

from sklearn.preprocessing import LabelEncoder

label_encoder= LabelEncoder()
nb_encoded = label_encoder.fit_transform(df[['neighbourhood']].values.ravel())

In [11]:
# Replace neighborhood column with integer label encoded data

df['neighbourhood'] = pd.Series(nb_encoded)

#### Split into train and test data

In [12]:
train = df.copy().iloc[train_idx] # 2016-2019
test = df.copy().iloc[test_idx] # 2020

In [13]:
# Divide into X and y vectors/arrays

X_train = train.drop(labels=['price'],axis=1).values
y_train = train[['price']].values

X_test = test.drop(labels=['price'],axis=1).values
y_test = test[['price']].values

In [14]:
# Save encoded data locally as csv files

df.to_csv("encoded_data.csv")
train.to_csv("train.csv")
test.to_csv("test.csv")

### Baseline Models with HyperParameter Tuning and some tinkering (e.g. standardization, Polynomial features)

In [15]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error, median_absolute_error, mean_absolute_error, r2_score
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet 
from sklearn.model_selection import KFold
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV

#### Ridge Regression - HP Tuning

In [16]:
# Ridge Regression - Hyperparameter Tuning with RandomizedCV

ridge = Ridge()

ridge_alpha_params = list(np.linspace(0.01, 1.5))

ridge_param_dist = dict(alpha=ridge_alpha_params)

ridge_rand = RandomizedSearchCV(ridge, ridge_param_dist, cv=10, scoring='neg_median_absolute_error', n_iter=50, random_state=42)

# fit
ridge_rand.fit(X_train, y_train)

# Scores
# ridge_rand.grid_scores_

# Examine the best model
print(-(ridge_rand.best_score_))
print(ridge_rand.best_params_)
print(ridge_rand.best_estimator_)

31.51131323108275
{'alpha': 1.4695918367346938}
Ridge(alpha=1.4695918367346938, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)


When we tried linspace(0.01, 1.5), we got about 1.469 as out best alpha parameter. Let's try RandomizedCV again in a different range closer to 1.469 (maybe 1.0 - 2.0 ?)

In [17]:
# Ridge Regression - Hyperparameter Tuning with RandomizedCV with different range

ridge = Ridge()

ridge_alpha_params = list(np.linspace(1.0, 3.0))

ridge_param_dist = dict(alpha=ridge_alpha_params)

ridge_rand = RandomizedSearchCV(ridge, ridge_param_dist, cv=10, scoring='neg_median_absolute_error', n_iter=30, random_state=50)

# fit
ridge_rand.fit(X_train, y_train)

# Examine the best model
print(-(ridge_rand.best_score_))
print(ridge_rand.best_params_)
print(ridge_rand.best_estimator_)

31.487369739797394
{'alpha': 2.7959183673469385}
Ridge(alpha=2.7959183673469385, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)


In [18]:
# Ridge Regression - Hyperparameter Tuning with RandomizedCV with different range (This time linspace(2.7, 5.0))

ridge = Ridge()

ridge_alpha_params = list(np.linspace(2.7, 5.0))

ridge_param_dist = dict(alpha=ridge_alpha_params)

ridge_rand = RandomizedSearchCV(ridge, ridge_param_dist, cv=10, scoring='neg_median_absolute_error', n_iter=30, random_state=50)

# fit
ridge_rand.fit(X_train, y_train)

# Examine the best model
print(-(ridge_rand.best_score_))
print(ridge_rand.best_params_)
print(ridge_rand.best_estimator_)

31.465771592189775
{'alpha': 4.3428571428571425}
Ridge(alpha=4.3428571428571425, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)


In [19]:
# Ridge Regression - Hyperparameter Tuning with RandomizedCV with different range (This time linspace(4, 10))

ridge = Ridge()

ridge_alpha_params = list(np.linspace(4, 10))

ridge_param_dist = dict(alpha=ridge_alpha_params)

ridge_rand = RandomizedSearchCV(ridge, ridge_param_dist, cv=10, scoring='neg_median_absolute_error', n_iter=30, random_state=50)

# fit
ridge_rand.fit(X_train, y_train)

# Examine the best model
print(-(ridge_rand.best_score_))
print(ridge_rand.best_params_)
print(ridge_rand.best_estimator_)

31.455988846702894
{'alpha': 8.285714285714285}
Ridge(alpha=8.285714285714285, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)


In [20]:
# Ridge Regression - Hyperparameter Tuning with RandomizedCV with different range (This time linspace(8, 100))

ridge = Ridge()

ridge_alpha_params = list(np.linspace(8, 100))

ridge_param_dist = dict(alpha=ridge_alpha_params)

ridge_rand = RandomizedSearchCV(ridge, ridge_param_dist, cv=10, scoring='neg_median_absolute_error', n_iter=30, random_state=50)

# fit
ridge_rand.fit(X_train, y_train)

# Examine the best model
print(-(ridge_rand.best_score_))
print(ridge_rand.best_params_)
print(ridge_rand.best_estimator_)

31.45363542922478
{'alpha': 23.02040816326531}
Ridge(alpha=23.02040816326531, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)


In [21]:
# Ridge Regression - Hyperparameter Tuning with RandomizedCV with different range (This time linspace(8, 100))

ridge = Ridge()

ridge_alpha_params = list(np.linspace(22, 24))

ridge_param_dist = dict(alpha=ridge_alpha_params)

ridge_gs = GridSearchCV(ridge, ridge_param_dist, cv=10, scoring='neg_median_absolute_error')

# fit
ridge_gs.fit(X_train, y_train)

# Examine the best model
print(-(ridge_gs.best_score_))
print(ridge_gs.best_params_)
print(ridge_gs.best_estimator_)

31.444239435543388
{'alpha': 22.0}
Ridge(alpha=22.0, copy_X=True, fit_intercept=True, max_iter=None,
      normalize=False, random_state=None, solver='auto', tol=0.001)


In [22]:
# Ridge Regression with the best parameter found from RandomizedCV + GridSearchCV

ridge = Ridge(alpha=22.0)

print("Ridge Cross Validation MSE: {}".format(round(-cross_val_score(ridge, X_train, y_train, cv=5, scoring='neg_mean_squared_error').mean(),2)))
print("Ridge Cross Validation Median Absolute Error: {}".format(round(-cross_val_score(ridge, X_train, y_train, cv=5, scoring='neg_median_absolute_error').mean(),2)))
print("Ridge Cross Validation Mean Absolute Error: {}".format(round(-cross_val_score(ridge, X_train, y_train, cv=5, scoring='neg_mean_absolute_error').mean(), 2)))
print("Ridge Cross Validation R-Squared: {}".format(round(-cross_val_score(ridge, X_train, y_train, cv=5, scoring='r2').mean(), 2)))

ridge.fit(X_train,y_train)

print("Ridge Predicton MSE: {}".format(round(mean_squared_error(y_test,ridge.predict(X_test)), 2 )))
print("Ridge Predicton Median Absolute Error: {}".format(round(median_absolute_error(y_test, ridge.predict(X_test)),2 )))
print("Ridge Predicton Mean Absolute Error: {}".format(round(mean_absolute_error(y_test, ridge.predict(X_test)), 2)))
print("Ridge Predicton R-Squared: {}".format(round(r2_score(y_test, ridge.predict(X_test)), 2 )))

Ridge Cross Validation MSE: 24112.94
Ridge Cross Validation Median Absolute Error: 31.28
Ridge Cross Validation Mean Absolute Error: 50.5
Ridge Cross Validation R-Squared: -0.15
Ridge Predicton MSE: 22965.22
Ridge Predicton Median Absolute Error: 30.65
Ridge Predicton Mean Absolute Error: 49.53
Ridge Predicton R-Squared: 0.15


Best median absolute error for Ridge Regression after HP tuning was 30.65 which is a 0.05 decrease from the baseline score without HP tuning

#### Lasso Regression with HP Tuning

In [23]:
# Lasso Regression - Hyperparameter Tuning with RandomizedCV

lasso = Lasso()

lasso_alpha_params = list(np.linspace(0.01, 0.2))

lasso_param_dist = dict(alpha=lasso_alpha_params)

lasso_rand = RandomizedSearchCV(lasso, lasso_param_dist, cv=10, scoring='neg_median_absolute_error', n_iter=30, random_state=42)

# fit
lasso_rand.fit(X_train, y_train)

# Examine the best model
print(-(lasso_rand.best_score_))
print(lasso_rand.best_params_)
print(lasso_rand.best_estimator_)

31.410606656709568
{'alpha': 0.08367346938775509}
Lasso(alpha=0.08367346938775509, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False)


In [24]:
# Lasso Regression - Hyperparameter Tuning with RandomizedCV on a different range (0.07 - 1.2)

lasso = Lasso()

lasso_alpha_params = list(np.linspace(0.07, 1.2))

lasso_param_dist = dict(alpha=lasso_alpha_params)

lasso_rand = RandomizedSearchCV(lasso, lasso_param_dist, cv=5, scoring='neg_median_absolute_error', n_iter=15, random_state=42)

# fit
lasso_rand.fit(X_train, y_train)

# Examine the best model
print(-(lasso_rand.best_score_))
print(lasso_rand.best_params_)
print(lasso_rand.best_estimator_)

31.28436968364913
{'alpha': 0.25448979591836735}
Lasso(alpha=0.25448979591836735, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False)


In [25]:
# Lasso Regression - Hyperparameter Tuning with RandomizedCV on a different range (0.2 - 0.5)

lasso = Lasso()

lasso_alpha_params = list(np.linspace(0.2, 0.5))

lasso_param_dist = dict(alpha=lasso_alpha_params)

lasso_rand = RandomizedSearchCV(lasso, lasso_param_dist, cv=5, scoring='neg_median_absolute_error', n_iter=15, random_state=42)

# fit
lasso_rand.fit(X_train, y_train)

# Examine the best model
print(-(lasso_rand.best_score_))
print(lasso_rand.best_params_)
print(lasso_rand.best_estimator_)

31.272616610606384
{'alpha': 0.2489795918367347}
Lasso(alpha=0.2489795918367347, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False)


In [26]:
# Lasso Regression - Hyperparameter Tuning with RandomizedCV on a different range (0.1 - 0.3)

lasso = Lasso()

lasso_alpha_params = list(np.linspace(0.1, 0.3))

lasso_param_dist = dict(alpha=lasso_alpha_params)

lasso_rand = RandomizedSearchCV(lasso, lasso_param_dist, cv=5, scoring='neg_median_absolute_error', n_iter=15, random_state=42)

# fit
lasso_rand.fit(X_train, y_train)

# Examine the best model
print(-(lasso_rand.best_score_))
print(lasso_rand.best_params_)
print(lasso_rand.best_estimator_)

31.277783590178295
{'alpha': 0.2510204081632653}
Lasso(alpha=0.2510204081632653, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False)


In [27]:
# Lasso Regression - Hyperparameter Tuning with GridSearchCV

lasso = Lasso()

lasso_alpha_params = list(np.linspace(0.1, 0.25))

lasso_param_dist = dict(alpha=lasso_alpha_params)

lasso_gs = GridSearchCV(lasso, lasso_param_dist, cv=5, scoring='neg_median_absolute_error')

# fit
lasso_gs.fit(X_train, y_train)

# Examine the best model
print(-(lasso_gs.best_score_))
print(lasso_gs.best_params_)
print(lasso_gs.best_estimator_)

31.24204763772941
{'alpha': 0.1}
Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False)


No gains from HP tuning. The best parameter alpha = 0.1 was the value we already used for baseline, so that being said, the alpha value we initially set for the baseline model already had the optimal parameter

In [28]:
# Lasso Regression

lasso = Lasso(alpha=0.1)

print("Lasso Cross Validation MSE: {}".format(round(-cross_val_score(lasso, X_train, y_train, cv=5, scoring='neg_mean_squared_error').mean(),2)))
print("Lasso Cross Validation Median Absolute Error: {}".format(round(-cross_val_score(lasso, X_train, y_train, cv=5, scoring='neg_median_absolute_error').mean(),2)))
print("Lasso Cross Validation Mean Absolute Error: {}".format(round(-cross_val_score(lasso, X_train, y_train, cv=5, scoring='neg_mean_absolute_error').mean(), 2)))
print("Lasso Cross Validation R-Squared: {}".format(round(-cross_val_score(lasso, X_train, y_train, cv=5, scoring='r2').mean(), 2)))

lasso.fit(X_train,y_train)

print("Lasso Predicton MSE: {}".format(round(mean_squared_error(y_test,lasso.predict(X_test)), 2 )))
print("Lasso Predicton Median Absolute Error: {}".format(round(median_absolute_error(y_test, lasso.predict(X_test)),2 )))
print("Lasso Predicton Mean Absolute Error: {}".format(round(mean_absolute_error(y_test, lasso.predict(X_test)), 2)))
print("Lasso Predicton R-Squared: {}".format(round(r2_score(y_test, lasso.predict(X_test)), 2 )))

Lasso Cross Validation MSE: 24129.95
Lasso Cross Validation Median Absolute Error: 31.24
Lasso Cross Validation Mean Absolute Error: 50.48
Lasso Cross Validation R-Squared: -0.15
Lasso Predicton MSE: 22983.24
Lasso Predicton Median Absolute Error: 30.59
Lasso Predicton Mean Absolute Error: 49.49
Lasso Predicton R-Squared: 0.15


#### ElasticNet with HP Tuning

In [29]:
# ElasticNet Regression - Hyperparameter Tuning with RandomizedCV

en = ElasticNet()

en_alpha_params = list(np.linspace(0.9, 1.0))
en_l1r_params = list(np.linspace(0, 1))

en_param_dist = dict(alpha=en_alpha_params, l1_ratio = en_l1r_params)

en_rand = RandomizedSearchCV(en, en_param_dist, cv=5, scoring='neg_median_absolute_error', n_iter=10, random_state=42)

# fit
en_rand.fit(X_train, y_train)

# Examine the best model
print(-(en_rand.best_score_))
print(en_rand.best_params_)
print(en_rand.best_estimator_)

34.02828446432686
{'l1_ratio': 0.9183673469387754, 'alpha': 0.9428571428571428}
ElasticNet(alpha=0.9428571428571428, copy_X=True, fit_intercept=True,
           l1_ratio=0.9183673469387754, max_iter=1000, normalize=False,
           positive=False, precompute=False, random_state=None,
           selection='cyclic', tol=0.0001, warm_start=False)


In [30]:
# ElasticNet Regression - Hyperparameter Tuning with RandomizedCV on different ranges

en = ElasticNet()

en_alpha_params = list(np.linspace(0.94, 1.4))
en_l1r_params = list(np.linspace(0.8, 1))

en_param_dist = dict(alpha=en_alpha_params, l1_ratio = en_l1r_params)

en_rand = RandomizedSearchCV(en, en_param_dist, cv=5, scoring='neg_median_absolute_error', n_iter=15, random_state=42)

# fit
en_rand.fit(X_train, y_train)

# Examine the best model
print(-(en_rand.best_score_))
print(en_rand.best_params_)
print(en_rand.best_estimator_)

32.01679948437963
{'l1_ratio': 0.9836734693877551, 'alpha': 1.137142857142857}
ElasticNet(alpha=1.137142857142857, copy_X=True, fit_intercept=True,
           l1_ratio=0.9836734693877551, max_iter=1000, normalize=False,
           positive=False, precompute=False, random_state=None,
           selection='cyclic', tol=0.0001, warm_start=False)


In [31]:
# ElasticNet Regression - Hyperparameter Tuning with RandomizedCV on different ranges

en = ElasticNet()

en_alpha_params = list(np.linspace(1.0, 1.5))
en_l1r_params = list(np.linspace(0.98, 1.0))

en_param_dist = dict(alpha=en_alpha_params, l1_ratio = en_l1r_params)

en_rand = RandomizedSearchCV(en, en_param_dist, cv=5, scoring='neg_median_absolute_error', n_iter=15, random_state=42)

# fit
en_rand.fit(X_train, y_train)

# Examine the best model
print(-(en_rand.best_score_))
print(en_rand.best_params_)
print(en_rand.best_estimator_)

31.71654719575546
{'l1_ratio': 0.9983673469387755, 'alpha': 1.2142857142857142}
ElasticNet(alpha=1.2142857142857142, copy_X=True, fit_intercept=True,
           l1_ratio=0.9983673469387755, max_iter=1000, normalize=False,
           positive=False, precompute=False, random_state=None,
           selection='cyclic', tol=0.0001, warm_start=False)


In [32]:
# ElasticNet Regression - Hyperparameter Tuning with RandomizedCV on different ranges

en = ElasticNet()

en_alpha_params = list(np.linspace(1.2, 1.8))
en_l1r_params = list(np.linspace(0.98, 1.0))

en_param_dist = dict(alpha=en_alpha_params, l1_ratio = en_l1r_params)

en_rand = RandomizedSearchCV(en, en_param_dist, cv=5, scoring='neg_median_absolute_error', n_iter=15, random_state=42)

# fit
en_rand.fit(X_train, y_train)

# Examine the best model
print(-(en_rand.best_score_))
print(en_rand.best_params_)
print(en_rand.best_estimator_)

31.710414198174295
{'l1_ratio': 0.9983673469387755, 'alpha': 1.457142857142857}
ElasticNet(alpha=1.457142857142857, copy_X=True, fit_intercept=True,
           l1_ratio=0.9983673469387755, max_iter=1000, normalize=False,
           positive=False, precompute=False, random_state=None,
           selection='cyclic', tol=0.0001, warm_start=False)


In [33]:
# ElasticNet Regression - Hyperparameter Tuning with RandomizedCV on different ranges

en = ElasticNet()

en_alpha_params = list(np.linspace(1.4, 2.0))
en_l1r_params = list(np.linspace(0.99, 1.0))

en_param_dist = dict(alpha=en_alpha_params, l1_ratio = en_l1r_params)

en_rand = RandomizedSearchCV(en, en_param_dist, cv=5, scoring='neg_median_absolute_error', n_iter=15, random_state=42)

# fit
en_rand.fit(X_train, y_train)

# Examine the best model
print(-(en_rand.best_score_))
print(en_rand.best_params_)
print(en_rand.best_estimator_)

31.715944184357006
{'l1_ratio': 0.9977551020408163, 'alpha': 1.7918367346938775}
ElasticNet(alpha=1.7918367346938775, copy_X=True, fit_intercept=True,
           l1_ratio=0.9977551020408163, max_iter=1000, normalize=False,
           positive=False, precompute=False, random_state=None,
           selection='cyclic', tol=0.0001, warm_start=False)


In [34]:
# ElasticNet Regression - Hyperparameter Tuning with GridSearCV

en = ElasticNet()

en_alpha_params = list(np.linspace(1.4, 1.8))
en_l1r_params = list(np.linspace(0.98, 1.0))

en_param_dist = dict(alpha=en_alpha_params, l1_ratio = en_l1r_params)

en_gs = GridSearchCV(en, en_param_dist, cv=5, scoring='neg_median_absolute_error')

# fit
en_gs.fit(X_train, y_train)

# Examine the best model
print(-(en_gs.best_score_))
print(en_gs.best_params_)
print(en_gs.best_estimator_)

31.694797733020163
{'alpha': 1.6612244897959183, 'l1_ratio': 1.0}
ElasticNet(alpha=1.6612244897959183, copy_X=True, fit_intercept=True,
           l1_ratio=1.0, max_iter=1000, normalize=False, positive=False,
           precompute=False, random_state=None, selection='cyclic', tol=0.0001,
           warm_start=False)


In [35]:
# ElasticNet

en = ElasticNet(alpha=1.6612244897959183, copy_X=True, fit_intercept=True,
           l1_ratio=1.0, max_iter=1000, normalize=False, positive=False,
           precompute=False, random_state=None, selection='cyclic', tol=0.0001,
           warm_start=False)

print("ElasticNet Cross Validation MSE: {}".format(round(-cross_val_score(en, X_train, y_train, cv=5, scoring='neg_mean_squared_error').mean(),2)))
print("ElasticNet Cross Validation Median Absolute Error: {}".format(round(-cross_val_score(en, X_train, y_train, cv=5, scoring='neg_median_absolute_error').mean(),2)))
print("ElasticNet Cross Validation Mean Absolute Error: {}".format(round(-cross_val_score(en, X_train, y_train, cv=5, scoring='neg_mean_absolute_error').mean(), 2)))
print("ElasticNet Cross Validation R-Squared: {}".format(round(-cross_val_score(en, X_train, y_train, cv=5, scoring='r2').mean(), 2)))

en.fit(X_train,y_train)

print("ElasticNet Predicton MSE: {}".format(round(mean_squared_error(y_test,en.predict(X_test)), 2 )))
print("ElasticNet Predicton Median Absolute Error: {}".format(round(median_absolute_error(y_test, en.predict(X_test)),2 )))
print("ElasticNet Predicton Mean Absolute Error: {}".format(round(mean_absolute_error(y_test, en.predict(X_test)), 2)))
print("ElasticNet Predicton R-Squared: {}".format(round(r2_score(y_test, en.predict(X_test)), 2 )))

ElasticNet Cross Validation MSE: 24447.86
ElasticNet Cross Validation Median Absolute Error: 31.69
ElasticNet Cross Validation Mean Absolute Error: 51.44
ElasticNet Cross Validation R-Squared: -0.14
ElasticNet Predicton MSE: 23291.91
ElasticNet Predicton Median Absolute Error: 31.17
ElasticNet Predicton Mean Absolute Error: 50.5
ElasticNet Predicton R-Squared: 0.13


### Blending all three linear regularization methods

In [74]:
lasso_ridge_en_pred_blend = lasso.predict(X_test) * 0.8 + ridge.predict(X_test).reshape(26907,) * 0.15 + en.predict(X_test) * 0.05

In [76]:
print("Blend Predicton MSE: {}".format(round(mean_squared_error(y_test, lasso_ridge_en_pred_blend),2 )))
print("Blend Predicton Median Absolute Error: {}".format(round(median_absolute_error(y_test, lasso_ridge_en_pred_blend),2 )))
print("Blend Predicton Mean Absolute Error: {}".format(round(mean_absolute_error(y_test, lasso_ridge_en_pred_blend),2 )))
print("Blend Predicton R-Squared: {}".format(round(r2_score(y_test, lasso_ridge_en_pred_blend),2 )))

Blend Predicton MSE: 22983.23
Blend Predicton Median Absolute Error: 30.53
Blend Predicton Mean Absolute Error: 49.44
Blend Predicton R-Squared: 0.15
