# Prep Data

In [5]:
from sklearn.metrics import mean_squared_error as MSE
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import fetch_california_housing
import statsmodels.api as sm
import pandas as pd

california_housing = fetch_california_housing()
df_california = pd.DataFrame(california_housing.data, columns = california_housing.feature_names)
df_california['PRICE'] = california_housing.target
df_california = sm.add_constant(df_california, prepend=False)

In [6]:
df_california.head().to_clipboard()

In [7]:
X = df_california.drop('PRICE', axis=1)
y = df_california['PRICE']

In [8]:
sc = StandardScaler()
X_scaled = sc.fit_transform(X)

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.25, shuffle=True)

# Ridge Regression

In [10]:
ols_model = sm.OLS(y_train, X_train)
compiled_model = ols_model.fit()
compiled_model_ridge = ols_model.fit_regularized(method='elastic_net', L1_wt=0, alpha=0.1,refit=True)

In [11]:
print('OLS Error: ', MSE(y_train, compiled_model.predict(X_train)) )
print('Ridge Regression Error: ', MSE(y_train, compiled_model_ridge.predict(X_train)))

OLS Error:  4.806871781767541
Ridge Regression Error:  4.862847117128481


In [12]:
print('OLS Error: ', MSE(y_test, compiled_model.predict(X_test)) )
print('Ridge Regression Error: ', MSE(y_test, compiled_model_ridge.predict(X_test)))

OLS Error:  4.799965081300563
Ridge Regression Error:  4.802637071118009


In [14]:
df_compare = pd.DataFrame({'Before Ridge Regression':compiled_model.params,
                           'After Ridge Regression':compiled_model_ridge.params})
df_compare.index=list(X.columns)

In [15]:
df_compare.T

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,const
Before Ridge Regression,0.829497,0.118072,-0.261175,0.331459,-0.010918,-0.027147,-0.993947,-0.950686,0.0
After Ridge Regression,0.736616,0.159327,-0.074473,0.092443,0.004344,-0.027914,-0.438714,-0.388492,0.0


In [500]:
df_compare.T.to_clipboard()

# LASSO Regression

In [16]:
ols_model = sm.OLS(y_train, X_train)
compiled_model = ols_model.fit()
compiled_model_lasso = ols_model.fit_regularized(method='elastic_net', L1_wt=1, alpha=0.1,refit=True)

print('OLS Error: ', MSE(y_train, compiled_model.predict(X_train)) )
print('LASSO Regression Error: ', MSE(y_train, compiled_model_lasso.predict(X_train)))

OLS Error:  4.806871781767541
LASSO Regression Error:  4.93937023995957


In [17]:
print('OLS Error: ', MSE(y_test, compiled_model.predict(X_test)) )
print('LASSO Regression Error: ', MSE(y_test, compiled_model_lasso.predict(X_test)))

OLS Error:  4.799965081300563
LASSO Regression Error:  4.870307535710372


In [18]:
df_compare = pd.DataFrame({'Before LASSO Regression':compiled_model.params,
                           'After LASSO Regression':compiled_model_ridge.params})
df_compare.index=list(X.columns)

In [19]:
df_compare.T

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,const
Before LASSO Regression,0.829497,0.118072,-0.261175,0.331459,-0.010918,-0.027147,-0.993947,-0.950686,0.0
After LASSO Regression,0.736616,0.159327,-0.074473,0.092443,0.004344,-0.027914,-0.438714,-0.388492,0.0


In [20]:
df_compare.T.to_clipboard()

# Elastic Net Regression

In [21]:
ols_model = sm.OLS(y_train, X_train)
compiled_model = ols_model.fit()
compiled_model_elastic = ols_model.fit_regularized(method='elastic_net', L1_wt=0.5, alpha=8,refit=True)

In [22]:
print('OLS Error: ', MSE(y_train, compiled_model.predict(X_train)) )
print('Elastic Net Regression Error: ', MSE(y_train, compiled_model_elastic.predict(X_train)))

OLS Error:  4.806871781767541
Elastic Net Regression Error:  5.646749280108061


In [23]:
print('OLS Error: ', MSE(y_test, compiled_model.predict(X_test)) )
print('Elastic Net Regression Error: ', MSE(y_test, compiled_model_elastic.predict(X_test)))

OLS Error:  4.799965081300563
Elastic Net Regression Error:  5.501684955624825


In [24]:
df_compare = pd.DataFrame({'Before Elastic Net Regression':compiled_model.params,
                           'After Elastic Net Regression':compiled_model_ridge.params})
df_compare.index=list(X.columns)

In [25]:
df_compare.T

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,const
Before Elastic Net Regression,0.829497,0.118072,-0.261175,0.331459,-0.010918,-0.027147,-0.993947,-0.950686,0.0
After Elastic Net Regression,0.736616,0.159327,-0.074473,0.092443,0.004344,-0.027914,-0.438714,-0.388492,0.0


In [26]:
df_compare.T.to_clipboard()