### Regularization Assignments

Use the model below as a basis for comparison to your regularized models.

In [1]:
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
import numpy as np
from sklearn.metrics import r2_score as r2
from sklearn.metrics import mean_absolute_error as mae


computers = pd.read_csv("../Data/Computers.csv")

computers.tail()

Unnamed: 0,price,speed,hd,ram,screen,cd,multi,premium,ads,trend
6254,1690,100,528,8,15,no,no,yes,39,35
6255,2223,66,850,16,15,yes,yes,yes,39,35
6256,2654,100,1200,24,15,yes,no,yes,39,35
6257,2195,100,850,16,15,yes,no,yes,39,35
6258,2490,100,850,16,17,yes,no,yes,39,35


In [2]:
# Use the dataframe below for model fitting, afterwards, feel free to try more feature engineering!

computers_eng = (
    pd.get_dummies(
        computers.assign(
            hd2 = computers["hd"] ** 2,
            hd3 = computers["hd"] ** 3,
        ),
    drop_first=True    
    ).astype(int)
)

In [3]:
from sklearn.model_selection import train_test_split

X = sm.add_constant(computers_eng.drop("price", axis=1))
y = np.log(computers["price"])

# Test Split

X, X_test, y, y_test = train_test_split(X, y, test_size=.2, random_state=12345)

In [4]:
model = sm.OLS(y, X).fit()

#model.summary()

print(f"Training R2: {r2(y, model.predict(X))}")
print(f"Training MAE: {mae(y, model.predict(X))}")

Training R2: 0.8028102057477985
Training MAE: 0.08772703487258389


In [5]:
print(f"Test R2: {r2(y_test, model.predict(X_test))}")
print(f"Test MAE: {mae(y_test, model.predict(X_test))}")

Test R2: 0.8065658815043161
Test MAE: 0.08779069040050624


## Assignment 1: Ridge Regression

Fit a ridge regression model using ridgeCV and compare the accuracy to the model above. 

Don't forget to standardize your data!

In [6]:
# Standardization


In [7]:
from sklearn.preprocessing import StandardScaler

std = StandardScaler()
X_tr = std.fit_transform(X.values)
X_te = std.transform(X_test.values)

In [8]:
from sklearn.linear_model import RidgeCV

n_alphas = 200
alphas = 10 ** np.linspace(-3, 3, n_alphas)

ridge_model = RidgeCV(alphas=alphas, cv=5)
ridge_model.fit(X_tr, y)

print(ridge_model.score(X_tr, y))
print(mae(y, ridge_model.predict(X_tr)))
print(ridge_model.alpha_)

0.8028077420511628
0.08772450452527322
1.6831803533309566


In [9]:
print(ridge_model.score(X_te, y_test))
print(mae(y_test, ridge_model.predict(X_te)))

0.8066408224246605
0.08776922958338967


# Assignment 2: Lasso Regression

Fit a lasso regression model using LassoCV and compare the accuracy to the models above! 

Did any coefficient values drop to zero?

In [12]:
from sklearn.linear_model import LassoCV

n_alphas = 200
alphas = 10 ** np.linspace(-3, 3, n_alphas)

lasso_model = LassoCV(alphas=alphas, cv=5)
lasso_model.fit(X_tr, y)

print(lasso_model.score(X_tr, y))
print(mae(y, lasso_model.predict(X_tr)))
print(lasso_model.alpha_)

0.8019553710214781
0.08781513877601245
0.001


In [13]:
lasso_model.score(X_te, y_test)

0.8067896442779818

In [14]:
list(zip(X.columns, lasso_model.coef_))

[('const', 0.0),
 ('speed', 0.08463043405530733),
 ('hd', 0.18516885807645328),
 ('ram', 0.11690128503401381),
 ('screen', 0.04524634029207188),
 ('ads', 0.012638230254620176),
 ('trend', -0.18506363919329577),
 ('hd2', -0.08372632381168839),
 ('hd3', -0.020294334735824846),
 ('cd_yes', 0.019861712889363618),
 ('multi_yes', 0.01566346550935769),
 ('premium_yes', -0.06971597714604676)]

## Assignment 3: Elastic Net Regression

Fit an elastic net regression model using ElasticNetCV and compare the accuracy to the models above! 

What was the optimal L1 Ratio?

In [15]:
from sklearn.linear_model import ElasticNetCV

alphas = 10 ** np.linspace(-3, 3, 200)
l1_ratios = np.linspace(.01, 1, 10) # .9, 1, 10

enet_model = ElasticNetCV(alphas=alphas, l1_ratio=l1_ratios, cv=5)
enet_model.fit(X_tr, y)

print(enet_model.score(X_tr, y))
print(enet_model.score(X_te, y_test))
print(enet_model.alpha_)
print(enet_model.l1_ratio)

0.8027869091639886
0.8067750446023396
0.001
[0.01 0.12 0.23 0.34 0.45 0.56 0.67 0.78 0.89 1.  ]
