# Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import sklearn
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt 
from sklearn import metrics 
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.linear_model import ElasticNet

%matplotlib inline

In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [3]:
# Import the Dataset

dataset = pd.read_csv("../Data/Mice Data/data_after_mice.csv",header=0,encoding='utf-8')

df = dataset.copy()

FileNotFoundError: [Errno 2] File b'../Data/Mice Data/data_after_mice.csv' does not exist: b'../Data/Mice Data/data_after_mice.csv'

In [None]:
dependent = df['int_rate']
df.drop(['int_rate'],axis=1,inplace=True)
independent = df

In [None]:
dependent.head()

In [None]:
independent.head()

In [None]:
train_x, test_x, train_y, test_y = train_test_split(independent,dependent, test_size=0.3, random_state=42) 
train_x.shape

# Setup Basic Models

In [None]:
lm_lasso = Lasso()
lm_ridge = Ridge()
lm_elastic = ElasticNet()

lm_lasso
lm_ridge
lm_elastic

In [None]:
lm_lasso.fit(train_x, train_y)
lm_ridge.fit(train_x, train_y)
lm_elastic.fit(train_x, train_y)

In [None]:
plt.figure(figsize=(35,20))
ft_importances_lm_lasso = pd.Series(lm_lasso.coef_, index = independent.columns)
ft_importances_lm_lasso.plot(kind='barh')
plt.show();

In [None]:
plt.figure(figsize=(35,20))
ft_importances_lm_ridge = pd.Series(lm_ridge.coef_, index = independent.columns)
ft_importances_lm_ridge.plot(kind='barh')
plt.show();

In [None]:
plt.figure(figsize=(35,20))
ft_importances_lm_elastic = pd.Series(lm_elastic.coef_, index = independent.columns)
ft_importances_lm_elastic.plot(kind='barh')
plt.show();

In [None]:
ft_importances_lm_elastic

In [None]:
# R2 Value

print("RSquare Value for Lasso Regression Test Data is-")
np.round(lm_lasso.score(test_x,test_y)*100,2)

print("RSquare Value for Ridge Regression Test Data is-")
np.round(lm_ridge.score(test_x,test_y)*100,2)

print("RSquare Value for Elastic Net Regression Test Data is-")
np.round(lm_elastic.score(test_x,test_y)*100,2)

In [None]:
# Predict on Training and Test Data

predict_test_lasso = lm_lasso.predict(test_x)
predict_test_ridge = lm_ridge.predict(test_x)
predict_test_elastic = lm_elastic.predict(test_x)


In [None]:
mse_lasso = np.round(metrics.mean_squared_error(test_y, predict_test_lasso),2)
errors_test_lasso = abs(predict_test_lasso - test_y)
mape_test_lasso = 100 * np.mean(errors_test_lasso / test_y)
accuracy_test_lasso = 100 - mape_test_lasso

print("Lasso Regression Mean Squared ERROR (MSE) for Test Data is {}".format(mse_lasso))
print("Lasso MAPE for Test Data is {}%".format(mape_test_lasso))
print("Lasso Accuracy for Test Data is {}%".format(accuracy_test_lasso))

mse_ridge = np.round(metrics.mean_squared_error(test_y, predict_test_ridge),2)
errors_test_ridge = abs(predict_test_ridge - test_y)
mape_test_ridge = 100 * np.mean(errors_test_ridge / test_y)
accuracy_test_ridge = 100 - mape_test_ridge

print("Ridge Regression Mean Squared ERROR (MSE) for Test Data is {}".format(mse_ridge))
print("Ridge MAPE for Test Data is {}%".format(mape_test_ridge))
print("Ridge Accuracy for Test Data is {}%".format(accuracy_test_ridge))

mse_elastic = np.round(metrics.mean_squared_error(test_y, predict_test_elastic),2)
errors_test_elastic = abs(predict_test_elastic - test_y)
mape_test_elastic = 100 * np.mean(errors_test_elastic / test_y)
accuracy_test_elastic = 100 - mape_test_elastic

print("Elastic Regression Mean Squared ERROR (MSE) for Test Data is {}".format(mse_elastic))
print("Elastic MAPE for Test Data is {}%".format(mape_test_elastic))
print("Elastic Accuracy for Test Data is {}%".format(accuracy_test_elastic))

# Creating an Evaluation Function to Run Models

In [None]:
def evaluate(model,X_train,X_test,Y_train,Y_test):
    
    model.fit(X_train,Y_train)
    predictions_train = model.predict(X_train)
    predictions_test = model.predict(X_test)
    
    errors_train = abs(predictions_train - Y_train)
    errors_test = abs(predictions_test - Y_test)
    
    mape_train = 100 * np.mean(errors_train / Y_train)
    mape_test = 100 * np.mean(errors_test / Y_test)
    
    accuracy_train = 100 - mape_train
    accuracy_test = 100 - mape_test
    print('Model Performance')
    print('Average Error(Train Data): {:0.4f} of int rate.'.format(np.mean(errors_train)))
    print('Average Error(Test Data): {:0.4f} of int rate.'.format(np.mean(errors_test)))
    
    print('Accuracy(Train Data) = {:0.2f}%.'.format(accuracy_train))
    print('Accuracy(Test Data) = {:0.2f}%.'.format(accuracy_test))
    
    print('Mape(Train Data): {:0.4f} of int rate'.format(mape_train))
    print('Mape(Test Data): {:0.4f} of int rate'.format(mape_test))
 
    return accuracy_train,accuracy_test

# Grid Search For Hyperparameter Tuning for all Models

In [None]:
# prepare a range of alpha values to test
alphas = np.array([1,0.1,0.01,0.001,0.0001,0])

In [None]:
# Tuning the Hyperparameter Alpha of Lasso Regression

model = Lasso()
grid_lasso = GridSearchCV(estimator=model, param_grid=dict(alpha=alphas))
grid_lasso.fit(train_x,train_y)
print(grid_lasso)

# summarize the results of the grid search
print(grid_lasso.best_score_)
print(grid_lasso.best_estimator_)

In [None]:
# Tuning the Hyperparameter Alpha of Ridge Regression

model = Ridge()
grid_ridge = GridSearchCV(estimator=model, param_grid=dict(alpha=alphas))
grid_ridge.fit(train_x,train_y)
print(grid_ridge)

# summarize the results of the grid search
print(grid_ridge.best_score_)
print(grid_ridge.best_estimator_)

In [None]:
# Tuning the Hyperparameter Alpha of Elastic Regression

model = ElasticNet()
grid_elastic = GridSearchCV(estimator=model, param_grid=dict(alpha=alphas))
grid_elastic.fit(train_x,train_y)
print(grid_elastic)

# summarize the results of the grid search
print(grid_elastic.best_score_)
print(grid_elastic.best_estimator_)

# Running the Models With the Best Hyperparmeters

In [None]:
lm_lasso_best = Lasso(alpha=0.0, copy_X=True, fit_intercept=True, max_iter=1000,
                      normalize=False, positive=False, precompute=False, random_state=None,
                      selection='cyclic', tol=0.0001, warm_start=False)

evaluate(lm_lasso_best, train_x, test_x, train_y, test_y)


In [None]:
lm_ridge_best = Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
                      normalize=False, random_state=None, solver='auto', tol=0.001)

evaluate(lm_ridge_best,train_x, test_x, train_y, test_y)

In [None]:
lm_elastic_best = ElasticNet(alpha=0.0, copy_X=True, fit_intercept=True, l1_ratio=0.5,
                              max_iter=1000, normalize=False, positive=False, precompute=False,
                              random_state=None, selection='cyclic', tol=0.0001, warm_start=False)

evaluate(lm_elastic_best,train_x, test_x, train_y, test_y)

We Chose the Hyperparameter "Alpha" to Tune

By Applying Hyperparameter Tuning on the Alpha Values of all the three algorithms, we can se that the Model Performance Improved Significantly.