# Importing the necessary libraries

In [None]:
# Importing the necessary libraries

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import math
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, RandomizedSearchCV, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
# Mounting Google drive

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Reading the dataset

df_unclean = pd.read_csv('/content/drive/MyDrive/Capstone/forestfires.csv')
df_unclean.head()

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0


# Getting a description of the dataset

In [None]:
# Getting a description of the dataset

df_unclean.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 517 entries, 0 to 516
Data columns (total 13 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   X       517 non-null    int64  
 1   Y       517 non-null    int64  
 2   month   517 non-null    object 
 3   day     517 non-null    object 
 4   FFMC    517 non-null    float64
 5   DMC     517 non-null    float64
 6   DC      517 non-null    float64
 7   ISI     517 non-null    float64
 8   temp    517 non-null    float64
 9   RH      517 non-null    int64  
 10  wind    517 non-null    float64
 11  rain    517 non-null    float64
 12  area    517 non-null    float64
dtypes: float64(8), int64(3), object(2)
memory usage: 52.6+ KB


In [None]:
# Checking the shape of the dataset

print("The shape of the dataset is:", df_unclean.shape)

The shape of the dataset is: (517, 13)


# Pre-processing the data

In [None]:
# Checking for NA values

print("The number of NA values in the dataset is:", df_unclean.isna().sum().sum())

The number of NA values in the dataset is: 0


In [None]:
# Checking for skewness in the predictor variable

df_unclean.skew(axis = 0)

  df_unclean.skew(axis = 0)


X        0.036246
Y        0.417296
FFMC    -6.575606
DMC      0.547498
DC      -1.100445
ISI      2.536325
temp    -0.331172
RH       0.862904
wind     0.571001
rain    19.816344
area    12.846934
dtype: float64

In [None]:
# Correcting the skewness in the response variable

df = df_unclean.copy()
df['area'] = np.log(df['area'] + 1)
print("Previous skew of area:", df_unclean['area'].skew())
print("Current skew of area:", df['area'].skew())

Previous skew of area: 12.846933533934868
Current skew of area: 1.2178376559535011


In [None]:
# Exploring the dataset


In [None]:
# Normalizing the FWI and other climatic factors

normalizer = MinMaxScaler()
def normalize(feature):
  transform = np.array(df[feature]).reshape(-1, 1)
  df[feature] = normalizer.fit_transform(transform)

feature_list = ['FFMC', 'DMC', 'DC', 'ISI', 'temp', 'RH', 'wind', 'rain']
for each in feature_list:
  normalize(each)

In [None]:
# Converting categorical values to numerical values

df['month'] = df['month'].replace(['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
df['day'] = df['day'].replace(['sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat'], [1, 2, 3, 4, 5, 6, 7])
print("The dataset after cleaning:") 
df.head()

The dataset after cleaning:


Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
0,7,5,3,6,0.870968,0.086492,0.101325,0.090909,0.192926,0.423529,0.7,0.0,0.0
1,7,4,10,3,0.927742,0.118194,0.775419,0.11943,0.508039,0.211765,0.055556,0.0,0.0
2,7,4,10,7,0.927742,0.146795,0.796294,0.11943,0.398714,0.211765,0.1,0.0,0.0
3,8,6,3,6,0.941935,0.110958,0.081623,0.160428,0.196141,0.964706,0.4,0.03125,0.0
4,8,6,3,1,0.910968,0.172984,0.11059,0.171123,0.29582,0.988235,0.155556,0.0,0.0


# Modeling using Lasso Regression

In [None]:
# Splitting the dataset into train and test

X = df.drop(columns = ['area'])
y = df['area']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [None]:
# Printing the shapes of the train and test datasets

print("X_train:", X_train.shape)
print("y_train:", y_train.shape)
print("X_test:", X_test.shape)
print("y_test:", y_test.shape)

X_train: (413, 12)
y_train: (413,)
X_test: (104, 12)
y_test: (104,)


In [None]:
# Implementing Lasso Regressor

lar = Lasso()
lar.fit(X_train, y_train)

Lasso()

In [None]:
# Code used for evaluation later

def evaluate_model(model, X_train, y_train, X_test, y_test):
  train_preds = model.predict(X_train)
  test_preds = model.predict(X_test)
  train_rmse = math.sqrt(mean_squared_error(y_train, train_preds))
  train_r2 = r2_score(y_train, train_preds)
  test_rmse = math.sqrt(mean_squared_error(y_test, test_preds))
  test_r2 = r2_score(y_test, test_preds)
  print("The train rmse using linear regression:", round(train_rmse, 3))
  print("The train r2 score using linear regression:", round(train_r2, 3))
  print("The test rmse using linear regression:", round(test_rmse, 3))
  print("The test r2 score using linear regression:", round(test_r2, 3))

In [None]:
# Evaluating the model

evaluate_model(lar, X_train, y_train, X_test, y_test)

The train rmse using linear regression: 1.375
The train r2 score using linear regression: 0.0
The test rmse using linear regression: 1.483
The test r2 score using linear regression: -0.001


# Hyper-parameter tuning

We perform parameter tuning to improve the model as the performance is not great for both the sets.

Using RondomizedSearchCV

In [None]:
# Getting the current parameters

lar.get_params()

{'alpha': 1.0,
 'copy_X': True,
 'fit_intercept': True,
 'max_iter': 1000,
 'normalize': 'deprecated',
 'positive': False,
 'precompute': False,
 'random_state': None,
 'selection': 'cyclic',
 'tol': 0.0001,
 'warm_start': False}

In [None]:
# Implementing hyper-parameter tuning using RandomSearchCV

alpha = np.arange(0.00, 1.0, 0.01)
selection = ['cyclic', 'random']
max_iter = list(range(100, 1000, 100))
grid_search = {'alpha': alpha,
               'selection': selection,
               'max_iter': max_iter}

In [None]:
lar_tuning = Lasso(random_state = 42)
lar_random_search = RandomizedSearchCV(estimator = lar_tuning, param_distributions = grid_search, n_iter = 5, scoring = 'r2', cv = 10, verbose = 2, random_state = 42, return_train_score = True)
lar_random_search.fit(X_train, y_train)

Fitting 10 folds for each of 5 candidates, totalling 50 fits
[CV] END .........alpha=0.62, max_iter=600, selection=cyclic; total time=   0.0s
[CV] END .........alpha=0.62, max_iter=600, selection=cyclic; total time=   0.0s
[CV] END .........alpha=0.62, max_iter=600, selection=cyclic; total time=   0.0s
[CV] END .........alpha=0.62, max_iter=600, selection=cyclic; total time=   0.0s
[CV] END .........alpha=0.62, max_iter=600, selection=cyclic; total time=   0.0s
[CV] END .........alpha=0.62, max_iter=600, selection=cyclic; total time=   0.0s
[CV] END .........alpha=0.62, max_iter=600, selection=cyclic; total time=   0.0s
[CV] END .........alpha=0.62, max_iter=600, selection=cyclic; total time=   0.0s
[CV] END .........alpha=0.62, max_iter=600, selection=cyclic; total time=   0.0s
[CV] END .........alpha=0.62, max_iter=600, selection=cyclic; total time=   0.0s
[CV] END .........alpha=0.81, max_iter=100, selection=random; total time=   0.0s
[CV] END .........alpha=0.81, max_iter=100, sele

RandomizedSearchCV(cv=10, estimator=Lasso(random_state=42), n_iter=5,
                   param_distributions={'alpha': array([0.  , 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1 ,
       0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 , 0.21,
       0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 , 0.31, 0.32,
       0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 , 0.41, 0.42, 0.43,
       0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5...
       0.55, 0.56, 0.57, 0.58, 0.59, 0.6 , 0.61, 0.62, 0.63, 0.64, 0.65,
       0.66, 0.67, 0.68, 0.69, 0.7 , 0.71, 0.72, 0.73, 0.74, 0.75, 0.76,
       0.77, 0.78, 0.79, 0.8 , 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87,
       0.88, 0.89, 0.9 , 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98,
       0.99]),
                                        'max_iter': [100, 200, 300, 400, 500,
                                                     600, 700, 800, 900],
                                        'selection': ['cyclic', 'random']},
    

In [None]:
# Getting the best parameters

lar_random_search.best_params_

{'selection': 'cyclic', 'max_iter': 600, 'alpha': 0.62}

In [None]:
# Evaluating the model

model = lar_random_search.best_estimator_
model.fit(X_train, y_train)
evaluate_model(model, X_train, y_train, X_test, y_test)

The train rmse using linear regression: 1.375
The train r2 score using linear regression: 0.0
The test rmse using linear regression: 1.483
The test r2 score using linear regression: -0.001


# Using GridSearchCV based on the results of random search

In [None]:
# Implementing Hyper-parameter tuning using GridSearchCV

alpha = np.arange(0.00, 1.0, 0.01)
selection = ['cyclic', 'random']
max_iter = list(range(100, 1000, 100))
parameters = {'alpha': alpha,
               'selection': selection,
               'max_iter': max_iter}
lar_grid_search = GridSearchCV(estimator = lar_tuning, param_grid = parameters, scoring = 'r2', cv = 10, verbose = 2, return_train_score = True)
lar_grid_search.fit(X_train, y_train)

Fitting 10 folds for each of 1800 candidates, totalling 18000 fits
[CV] END ..........alpha=0.0, max_iter=100, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=100, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=100, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=100, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=100, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=100, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=100, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=100, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=100, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=100, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=100, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=100

  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_f

[CV] END ..........alpha=0.0, max_iter=100, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=100, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=100, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=100, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=100, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=100, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=200, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=200, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=200, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=200, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=200, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=200, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0

  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_f

[CV] END ..........alpha=0.0, max_iter=200, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=200, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=200, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=200, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=200, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=200, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=200, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=200, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=200, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=200, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=200, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=300, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0

  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_f

[CV] END ..........alpha=0.0, max_iter=300, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=300, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=300, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=300, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=300, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=300, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=300, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=300, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=300, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=300, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=300, selection=random; total time=   0.0s

  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_f


[CV] END ..........alpha=0.0, max_iter=300, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=300, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=300, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=300, selection=random; total time=   0.0s


  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV] END ..........alpha=0.0, max_iter=300, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=300, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=400, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=400, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=400, selection=cyclic; total time=   0.0s


  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV] END ..........alpha=0.0, max_iter=400, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=400, selection=cyclic; total time=   0.1s
[CV] END ..........alpha=0.0, max_iter=400, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=400, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=400, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=400, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=400, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=400, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=400, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=400, selection=random; total time=   0.0s


  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_f

[CV] END ..........alpha=0.0, max_iter=400, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=400, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=400, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=400, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=400, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=400, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=400, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=500, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=500, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=500, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=500, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=500, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0

  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_f

[CV] END ..........alpha=0.0, max_iter=500, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=500, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=500, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=500, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=500, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=500, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=500, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=500, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=500, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=600, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=600, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=600, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.e

[CV] END ..........alpha=0.0, max_iter=600, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=600, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=600, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=600, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=600, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=600, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=600, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=600, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=600, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=600, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=600, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=700, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.e

[CV] END ..........alpha=0.0, max_iter=700, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=700, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=700, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=700, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=700, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=700, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=700, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=700, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=700, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=700, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=700, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=700, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.e

[CV] END ..........alpha=0.0, max_iter=800, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=800, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=800, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=800, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=800, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=800, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=800, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=800, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=800, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=800, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=800, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=800, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0

  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_t

[CV] END ..........alpha=0.0, max_iter=900, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=900, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=900, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=900, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=900, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=900, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=900, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=900, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=900, selection=cyclic; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=900, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=900, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0, max_iter=900, selection=random; total time=   0.0s
[CV] END ..........alpha=0.0

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  estimator.fit(X_train, y_train, **fit_params)
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[CV] END .........alpha=0.72, max_iter=300, selection=cyclic; total time=   0.0s
[CV] END .........alpha=0.72, max_iter=300, selection=cyclic; total time=   0.0s
[CV] END .........alpha=0.72, max_iter=300, selection=cyclic; total time=   0.0s
[CV] END .........alpha=0.72, max_iter=300, selection=cyclic; total time=   0.0s
[CV] END .........alpha=0.72, max_iter=300, selection=cyclic; total time=   0.0s
[CV] END .........alpha=0.72, max_iter=300, selection=cyclic; total time=   0.0s
[CV] END .........alpha=0.72, max_iter=300, selection=cyclic; total time=   0.0s
[CV] END .........alpha=0.72, max_iter=300, selection=cyclic; total time=   0.0s
[CV] END .........alpha=0.72, max_iter=300, selection=cyclic; total time=   0.0s
[CV] END .........alpha=0.72, max_iter=300, selection=cyclic; total time=   0.0s
[CV] END .........alpha=0.72, max_iter=300, selection=random; total time=   0.0s
[CV] END .........alpha=0.72, max_iter=300, 

GridSearchCV(cv=10, estimator=Lasso(random_state=42),
             param_grid={'alpha': array([0.  , 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1 ,
       0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 , 0.21,
       0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 , 0.31, 0.32,
       0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 , 0.41, 0.42, 0.43,
       0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5 , 0.51, 0.52, 0.53, 0.54,
       0.55, 0.56, 0.57, 0.58, 0.59, 0.6 , 0.61, 0.62, 0.63, 0.64, 0.65,
       0.66, 0.67, 0.68, 0.69, 0.7 , 0.71, 0.72, 0.73, 0.74, 0.75, 0.76,
       0.77, 0.78, 0.79, 0.8 , 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87,
       0.88, 0.89, 0.9 , 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98,
       0.99]),
                         'max_iter': [100, 200, 300, 400, 500, 600, 700, 800,
                                      900],
                         'selection': ['cyclic', 'random']},
             return_train_score=True, scoring='r2', verbo

In [None]:
# Getting the best parameters

lar_grid_search.best_params_

{'alpha': 0.1, 'max_iter': 100, 'selection': 'random'}

In [None]:
# Evaluating the model

model = lar_grid_search.best_estimator_
model.fit(X_train, y_train)
evaluate_model(model, X_train, y_train, X_test, y_test)

The train rmse using linear regression: 1.365
The train r2 score using linear regression: 0.014
The test rmse using linear regression: 1.47
The test r2 score using linear regression: 0.017
