# Ridge Regression and Lasso

This notebook explores ridge regression and lasso. These alternative linear fitting techniques can improve a model's performance and interpretability.

## Import libraries 

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

## Exploratory Data Analysis 

In [None]:
DATAPATH = './data/Advertising.csv'

data = pd.read_csv(DATAPATH)
data.head()

In [None]:
data.drop(['Unnamed: 0'], axis=1, inplace=True)

In [None]:
data.head()

In [None]:
data.columns

In [None]:
def scatter_plot(feature, target):
    plt.figure(figsize=(16, 8))
    plt.scatter(
        data[feature],
        data[target],
        c='black'
    )
    plt.xlabel("Money spent on {} ads ($)".format(feature))
    plt.ylabel("Sales ($k)")
    plt.show()

In [None]:
scatter_plot('TV', 'sales')

In [None]:
scatter_plot('radio', 'sales')

In [None]:
scatter_plot('newspaper', 'sales')

## Modelling 

### Multiple linear regression - least squares fitting 

In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression

Xs = data.drop(['sales'], axis=1)
y = data['sales'].values.reshape(-1,1)

lin_reg = LinearRegression()

# 5- fold Cross Validation
MSEs = cross_val_score(lin_reg, Xs, y, scoring='neg_mean_squared_error', cv=5)

mean_MSE = np.mean(MSEs)

print(mean_MSE)

### Ridge regression 

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Ridge

alpha = [1e-15, 1e-10, 1e-8, 1e-4, 1e-3,1e-2, 1, 5, 10, 20]

ridge = Ridge()

parameters = {'alpha': [1e-15, 1e-10, 1e-8, 1e-4, 1e-3,1e-2, 1, 5, 10, 20]}

# Run the grid search with 5-fold cross validation
# The GridSearchCV computes accuracy metrics for an algorithm on various combinations of parameters, 
# over a cross-validation procedure. This is useful for finding the best set of parameters for a prediction algorithm
ridge_regressor = GridSearchCV(ridge, parameters,scoring='neg_mean_squared_error', cv=5)

ridge_regressor.fit(Xs, y)

In [None]:
ridge_regressor.best_params_

In [None]:
ridge_regressor.best_score_

### Lasso 

In [None]:
from sklearn.linear_model import Lasso

lasso = Lasso()

parameters = {'alpha': [1e-15, 1e-10, 1e-8, 1e-4, 1e-3,1e-2, 1, 5, 10, 20]}

lasso_regressor = GridSearchCV(lasso, parameters, scoring='neg_mean_squared_error', cv = 5)

lasso_regressor.fit(Xs, y)

In [None]:
lasso_regressor.best_params_

In [None]:
lasso_regressor.best_score_