# Elastic Net Regression

In [1]:
# extension that autoformats all code to PEP8
%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
import numpy as np
import pandas as pd
import plotly.express as px
import statsmodels.api as sm
from sklearn.linear_model import ElasticNet
from IPython.display import display
import plotly.express as px
import plotly.graph_objects as go

  from pandas import Int64Index as NumericIndex


<IPython.core.display.Javascript object>

The advantage of Elastic Net is that we get the best of both worlds: the dimnesion reduction by shrinkage without ignoring highly correlated variables in groups.Our goal in Elastic Net is to minimize the following loss function:
$$
1 / (2 * n_{samples}) * ||y - Xw||^2_2
+ alpha * l_1 ratio * ||w||_1
+ 0.5 * alpha * (1 - l_1 ratio) * ||w||^2_2
$$ 
or 
$$
a * ||w||_1 + 0.5 * b * ||w||_2^2
$$

## Data Loading and Cleaning

In [3]:
# Using updated dataframe from preprocessing notebook
df = pd.read_csv("../data/listings.csv")
# Random column of index
df = df.drop(["Unnamed: 0"], axis=1,)
display(df)

Unnamed: 0,price,minimum_nights,number_of_reviews,reviews_per_month,calculated_host_listings_count,availability_365,neighbourhood_group_Bronx,neighbourhood_group_Brooklyn,neighbourhood_group_Manhattan,neighbourhood_group_Queens,...,neighbourhood_Williamsburg,neighbourhood_Willowbrook,neighbourhood_Windsor Terrace,neighbourhood_Woodhaven,neighbourhood_Woodlawn,neighbourhood_Woodside,room_type_Entire home/apt,room_type_Private room,room_type_Shared room,days_since_review
0,149,1,9,0.21,6,365,0,1,0,0,...,0,0,0,0,0,0,0,1,0,1235
1,225,1,45,0.38,2,355,0,0,1,0,...,0,0,0,0,0,0,1,0,0,1021
2,89,1,270,4.64,1,194,0,1,0,0,...,0,0,0,0,0,0,1,0,0,976
3,80,10,9,0.10,1,0,0,0,1,0,...,0,0,0,0,0,0,1,0,0,1204
4,200,3,74,0.59,1,129,0,0,1,0,...,0,0,0,0,0,0,1,0,0,989
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38832,129,1,1,1.00,1,147,0,0,1,0,...,0,0,0,0,0,0,0,1,0,974
38833,45,1,1,1.00,6,339,0,0,0,1,...,0,0,0,0,0,0,0,1,0,974
38834,235,1,1,1.00,1,87,0,0,0,0,...,0,0,0,0,0,0,0,1,0,974
38835,100,1,2,2.00,1,40,1,0,0,0,...,0,0,0,0,0,0,1,0,0,974


<IPython.core.display.Javascript object>

In [4]:
# Who needs a black box?
def standardizeData(data):
    # Takes pandas dataframe and standardizes it iteratively
    for column in data:
        data[column] = (data[column] - np.mean(data[column])) / np.std(data[column])
    return data

<IPython.core.display.Javascript object>

In [5]:
# Create training data, holding out 10% of data for testing
from sklearn.model_selection import train_test_split  # Returns pandas dataframe

Xtrain, Xtest, Ytrain, Ytest = train_test_split(
    df.drop("price", axis=1), df["price"], test_size=0.2, random_state=42
)

# # Standardize data for faster convergence in gradient descent
Xtrain, Xtest = standardizeData(Xtrain), standardizeData(Xtest)

display(Xtrain)
display(Xtest)

Unnamed: 0,minimum_nights,number_of_reviews,reviews_per_month,calculated_host_listings_count,availability_365,neighbourhood_group_Bronx,neighbourhood_group_Brooklyn,neighbourhood_group_Manhattan,neighbourhood_group_Queens,neighbourhood_group_Staten Island,...,neighbourhood_Williamsburg,neighbourhood_Willowbrook,neighbourhood_Windsor Terrace,neighbourhood_Woodhaven,neighbourhood_Woodlawn,neighbourhood_Woodside,room_type_Entire home/apt,room_type_Private room,room_type_Shared room,days_since_review
6263,-0.167611,2.561530,0.915898,-0.158841,0.947496,-0.152154,-0.860558,1.159744,-0.365082,-0.08897,...,-0.297550,-0.005673,-0.057109,-0.042112,-0.017022,-0.065071,-1.045753,1.092554,-0.14925,-0.640362
18571,-0.226746,0.967890,0.844265,-0.120813,1.194061,-0.152154,1.162037,-0.862259,-0.365082,-0.08897,...,-0.297550,-0.005673,-0.057109,-0.042112,-0.017022,-0.065071,0.956249,-0.915287,-0.14925,-0.637946
31589,-0.167611,-0.563660,-0.588380,-0.158841,-0.886333,-0.152154,-0.860558,1.159744,-0.365082,-0.08897,...,-0.297550,-0.005673,-0.057109,-0.042112,-0.017022,-0.065071,0.956249,-0.915287,-0.14925,-0.570287
25228,-0.285880,0.119329,0.486104,-0.158841,1.856705,-0.152154,-0.860558,-0.862259,2.739112,-0.08897,...,-0.297550,-0.005673,-0.057109,-0.042112,-0.017022,-0.065071,0.956249,-0.915287,-0.14925,-0.652444
32717,-0.285880,-0.025548,1.912780,-0.158841,-0.031060,-0.152154,-0.860558,1.159744,-0.365082,-0.08897,...,-0.297550,-0.005673,-0.057109,-0.042112,-0.017022,-0.065071,-1.045753,1.092554,-0.14925,-0.657277
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6265,-0.108477,1.857845,0.515951,-0.158841,1.217177,-0.152154,1.162037,-0.862259,-0.365082,-0.08897,...,-0.297550,-0.005673,-0.057109,-0.042112,-0.017022,-0.065071,0.956249,-0.915287,-0.14925,-0.628280
11284,-0.167611,-0.460177,-0.689859,0.297494,-0.316151,-0.152154,1.162037,-0.862259,-0.365082,-0.08897,...,-0.297550,-0.005673,-0.057109,-0.042112,-0.017022,-0.065071,0.956249,-0.915287,-0.14925,-0.521958
38158,-0.285880,-0.501570,2.169462,-0.158841,0.069107,-0.152154,-0.860558,1.159744,-0.365082,-0.08897,...,-0.297550,-0.005673,-0.057109,-0.042112,-0.017022,-0.065071,0.956249,-0.915287,-0.14925,-0.642779
860,-0.167611,-0.087637,-0.642104,-0.158841,-0.886333,-0.152154,1.162037,-0.862259,-0.365082,-0.08897,...,3.360776,-0.005673,-0.057109,-0.042112,-0.017022,-0.065071,0.956249,-0.915287,-0.14925,-0.193328


Unnamed: 0,minimum_nights,number_of_reviews,reviews_per_month,calculated_host_listings_count,availability_365,neighbourhood_group_Bronx,neighbourhood_group_Brooklyn,neighbourhood_group_Manhattan,neighbourhood_group_Queens,neighbourhood_group_Staten Island,...,neighbourhood_Williamsburg,neighbourhood_Willowbrook,neighbourhood_Windsor Terrace,neighbourhood_Woodhaven,neighbourhood_Woodlawn,neighbourhood_Woodside,room_type_Entire home/apt,room_type_Private room,room_type_Shared room,days_since_review
8022,-0.208915,-0.242654,-0.608102,-0.156562,1.289191,-0.150924,1.186565,-0.877428,-0.366532,-0.095359,...,3.348707,,-0.059059,-0.046832,-0.016048,-0.071035,-1.056703,1.103990,-0.14913,-0.539198
3875,-0.000242,-0.599271,-0.813864,-0.156562,-0.889135,-0.150924,-0.842769,1.139695,-0.366532,-0.095359,...,-0.298623,,-0.059059,-0.046832,-0.016048,-0.071035,0.946340,-0.905805,-0.14913,3.931065
18829,-0.261084,-0.305586,-0.461129,-0.156562,-0.889135,-0.150924,-0.842769,1.139695,-0.366532,-0.095359,...,-0.298623,,-0.059059,-0.046832,-0.016048,-0.071035,0.946340,-0.905805,-0.14913,0.394526
32406,-0.208915,-0.347541,0.250219,-0.118540,-0.134501,6.625838,-0.842769,-0.877428,-0.366532,-0.095359,...,-0.298623,,-0.059059,-0.046832,-0.016048,-0.071035,0.946340,-0.905805,-0.14913,-0.609348
26597,1.251799,-0.578293,-0.678649,0.033548,1.094697,-0.150924,1.186565,-0.877428,-0.366532,-0.095359,...,-0.298623,,-0.059059,-0.046832,-0.016048,-0.071035,0.946340,-0.905805,-0.14913,-0.161838
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2923,-0.052410,1.057951,-0.143668,-0.118540,1.429226,-0.150924,-0.842769,-0.877428,2.728274,-0.095359,...,-0.298623,,-0.059059,-0.046832,-0.016048,-0.071035,-1.056703,1.103990,-0.14913,-0.585159
14280,-0.208915,0.302761,-0.043726,-0.156562,-0.881356,-0.150924,1.186565,-0.877428,-0.366532,-0.095359,...,-0.298623,,-0.059059,-0.046832,-0.016048,-0.071035,-1.056703,1.103990,-0.14913,-0.638376
11098,-0.261084,-0.599271,-0.813864,-0.156562,-0.889135,-0.150924,-0.842769,1.139695,-0.366532,-0.095359,...,-0.298623,,-0.059059,-0.046832,-0.016048,-0.071035,-1.056703,1.103990,-0.14913,2.462748
17025,-0.261084,-0.536338,-0.743317,-0.156562,-0.889135,-0.150924,-0.842769,1.139695,-0.366532,-0.095359,...,-0.298623,,-0.059059,-0.046832,-0.016048,-0.071035,0.946340,-0.905805,-0.14913,1.383886


<IPython.core.display.Javascript object>

## Performing Elastic Net Regression

In [28]:
# doing Elastic Net by hand via a class
# https://math.stackexchange.com/questions/1111504/differentiation-with-respect-to-a-matrix-residual-sum-of-squares
class ElasticRegression:
    def __init__(self, learning_rate, iterations, l1_penalty, l2_penalty):

        self.learning_rate = learning_rate
        self.iterations = iterations
        self.l1_penalty = l1_penalty
        self.l2_penalty = l2_penalty

    # Function for model training
    def fit(self, X, Y):

        # no_of_training_examples, no_of_features
        self.m, self.n = X.shape
        print(self.m, self.n)
        # weight initialization
        self.W = np.zeros(self.n)
        self.b = 0
        self.X = X
        self.Y = Y

        # gradient descent learning
        for i in range(self.iterations):
            self.update_weights()

        return self

    # Helper function to update weights in gradient descent
    def update_weights(self):
        # print("Look here:")
        # display(self.X @ self.W)
        Y_pred = self.predict(self.X)

        # calculate gradients
        dW = np.zeros(self.n)

        for j in range(self.n):
            if self.W[j] > 0:
                dW[j] = (
                    -(2 * np.transpose(self.X[:, j]) @ (self.Y - Y_pred))
                    + self.l1_penalty
                    + 2 * self.l2_penalty * self.W[j]
                ) / self.m
            else:
                dW[j] = (
                    -(2 * np.transpose(self.X[:, j]) @ (self.Y - Y_pred))
                    - self.l1_penalty
                    + 2 * self.l2_penalty * self.W[j]
                ) / self.m
        db = -2 * np.sum(self.Y - Y_pred) / self.m

        # update weights
        self.W = self.W - self.learning_rate * dW
        self.b = self.b - self.learning_rate * db

        return self

    # Hypothetical function  h( x )
    def predict(self, X):
        print("I AM IN THE PREDICT FUNCTION")
        print(X.shape)
        display(X)
        print(self.W.shape)
        display(self.W)
        print("THIS IS MY SELF.B WHATEVER THIS IS ")
        display(self.b)
        print("I AM MULTIPLYING")
        display(X @ self.W + self.b)
        return X @ self.W + self.b

<IPython.core.display.Javascript object>

### Testing with the 'Iris' Data Set

In [7]:
# Load in iris from seaborn library
iris = pd.read_csv(
    "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv"
)
iris = iris.drop("species", axis=1)
display(iris)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


<IPython.core.display.Javascript object>

In [8]:
Xtrain_iris, Xtest_iris, Ytrain_iris, Ytest_iris = train_test_split(
    iris.drop("sepal_length", axis=1),
    iris["sepal_length"],
    test_size=0.2,
    random_state=42,
)

# Standardize our X matrices
Xtrain_iris, Xtest_iris = standardizeData(Xtrain_iris), standardizeData(Xtest_iris)

# Do the model
model = ElasticRegression(
    iterations=100, learning_rate=0.01, l1_penalty=0.99, l2_penalty=0.01
)
model.fit(Xtrain_iris.to_numpy(), Ytrain_iris.to_numpy())

Y_pred_iris = model.predict(Xtest_iris)
display(pd.DataFrame({"Predictions": Y_pred_iris, "Actual": Ytest_iris}).head())

120 3


Unnamed: 0,Predictions,Actual
73,5.112736,6.1
18,4.446238,5.7
118,5.964557,7.7
78,5.219452,6.0
76,5.212697,6.8


<IPython.core.display.Javascript object>

### On to Our AirBnB Data

In [29]:
enm = ElasticRegression(
    iterations=10, learning_rate=0.01, l1_penalty=0.99, l2_penalty=0.01
)

display(Xtrain)
enm.fit(Xtrain.to_numpy(), Ytrain.to_numpy())

Y_pred = model.predict(Xtest)

print("Predicted values", np.round(Y_pred, 2))
print("Real values:", Ytest)

display(pd.DataFrame({"Predictions": Y_pred, "Actual": Ytest}))

print("Trained W: ", round(model.W[0], 2))
print("Trained b: ", round(model.b, 2))

Unnamed: 0,minimum_nights,number_of_reviews,reviews_per_month,calculated_host_listings_count,availability_365,neighbourhood_group_Bronx,neighbourhood_group_Brooklyn,neighbourhood_group_Manhattan,neighbourhood_group_Queens,neighbourhood_group_Staten Island,...,neighbourhood_Williamsburg,neighbourhood_Willowbrook,neighbourhood_Windsor Terrace,neighbourhood_Woodhaven,neighbourhood_Woodlawn,neighbourhood_Woodside,room_type_Entire home/apt,room_type_Private room,room_type_Shared room,days_since_review
6263,-0.167611,2.561530,0.915898,-0.158841,0.947496,-0.152154,-0.860558,1.159744,-0.365082,-0.08897,...,-0.297550,-0.005673,-0.057109,-0.042112,-0.017022,-0.065071,-1.045753,1.092554,-0.14925,-0.640362
18571,-0.226746,0.967890,0.844265,-0.120813,1.194061,-0.152154,1.162037,-0.862259,-0.365082,-0.08897,...,-0.297550,-0.005673,-0.057109,-0.042112,-0.017022,-0.065071,0.956249,-0.915287,-0.14925,-0.637946
31589,-0.167611,-0.563660,-0.588380,-0.158841,-0.886333,-0.152154,-0.860558,1.159744,-0.365082,-0.08897,...,-0.297550,-0.005673,-0.057109,-0.042112,-0.017022,-0.065071,0.956249,-0.915287,-0.14925,-0.570287
25228,-0.285880,0.119329,0.486104,-0.158841,1.856705,-0.152154,-0.860558,-0.862259,2.739112,-0.08897,...,-0.297550,-0.005673,-0.057109,-0.042112,-0.017022,-0.065071,0.956249,-0.915287,-0.14925,-0.652444
32717,-0.285880,-0.025548,1.912780,-0.158841,-0.031060,-0.152154,-0.860558,1.159744,-0.365082,-0.08897,...,-0.297550,-0.005673,-0.057109,-0.042112,-0.017022,-0.065071,-1.045753,1.092554,-0.14925,-0.657277
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6265,-0.108477,1.857845,0.515951,-0.158841,1.217177,-0.152154,1.162037,-0.862259,-0.365082,-0.08897,...,-0.297550,-0.005673,-0.057109,-0.042112,-0.017022,-0.065071,0.956249,-0.915287,-0.14925,-0.628280
11284,-0.167611,-0.460177,-0.689859,0.297494,-0.316151,-0.152154,1.162037,-0.862259,-0.365082,-0.08897,...,-0.297550,-0.005673,-0.057109,-0.042112,-0.017022,-0.065071,0.956249,-0.915287,-0.14925,-0.521958
38158,-0.285880,-0.501570,2.169462,-0.158841,0.069107,-0.152154,-0.860558,1.159744,-0.365082,-0.08897,...,-0.297550,-0.005673,-0.057109,-0.042112,-0.017022,-0.065071,0.956249,-0.915287,-0.14925,-0.642779
860,-0.167611,-0.087637,-0.642104,-0.158841,-0.886333,-0.152154,1.162037,-0.862259,-0.365082,-0.08897,...,3.360776,-0.005673,-0.057109,-0.042112,-0.017022,-0.065071,0.956249,-0.915287,-0.14925,-0.193328


31069 232
I AM IN THE PREDICT FUNCTION
(31069, 232)


array([[-0.1676111 ,  2.56153032,  0.91589773, ...,  1.09255373,
        -0.14925012, -0.6403622 ],
       [-0.22674567,  0.96789036,  0.84426547, ..., -0.91528679,
        -0.14925012, -0.6379458 ],
       [-0.1676111 , -0.56365972, -0.58837961, ..., -0.91528679,
        -0.14925012, -0.57028651],
       ...,
       [-0.28588025, -0.50156985,  2.16946218, ..., -0.91528679,
        -0.14925012, -0.6427786 ],
       [-0.1676111 , -0.0876374 , -0.64210381, ..., -0.91528679,
        -0.14925012, -0.19332764],
       [-0.28588025,  1.00928361,  0.57564452, ...,  1.09255373,
        -0.14925012, -0.65727702]])

(232,)


array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0.

THIS IS MY SELF.B WHATEVER THIS IS 


0

I AM MULTIPLYING


array([0., 0., 0., ..., 0., 0., 0.])

I AM IN THE PREDICT FUNCTION
(31069, 232)


array([[-0.1676111 ,  2.56153032,  0.91589773, ...,  1.09255373,
        -0.14925012, -0.6403622 ],
       [-0.22674567,  0.96789036,  0.84426547, ..., -0.91528679,
        -0.14925012, -0.6379458 ],
       [-0.1676111 , -0.56365972, -0.58837961, ..., -0.91528679,
        -0.14925012, -0.57028651],
       ...,
       [-0.28588025, -0.50156985,  2.16946218, ..., -0.91528679,
        -0.14925012, -0.6427786 ],
       [-0.1676111 , -0.0876374 , -0.64210381, ..., -0.91528679,
        -0.14925012, -0.19332764],
       [-0.28588025,  1.00928361,  0.57564452, ...,  1.09255373,
        -0.14925012, -0.65727702]])

(232,)


array([ 1.16082523e-01, -1.36209211e-01, -1.17239697e-01,  2.15949119e-01,
        2.95310200e-01, -1.87293171e-01, -3.38615153e-01,  6.27984754e-01,
       -3.33850381e-01, -8.78884811e-02, -2.68716118e-02, -1.70148848e-02,
       -1.15157186e-02,  1.34127920e-02, -6.17822900e-02, -2.29433857e-02,
        2.35793331e-02, -4.03859517e-02, -9.01500553e-03,  8.82603547e-04,
       -1.61436318e-02, -2.39418092e-02, -1.02061517e-02, -2.07832990e-01,
        6.13460551e-03, -1.60928841e-02, -2.29992364e-02, -5.22161537e-02,
       -1.00606512e-02,  2.77224167e-02, -8.27192708e-02,  5.98866293e-03,
       -1.66745324e-02, -4.23775693e-02, -3.67615414e-02,  7.93793206e-02,
       -5.29814521e-02, -1.32745066e-02, -2.65652056e-01, -2.62714257e-02,
       -5.36497389e-02,  5.33877524e-02, -1.09476836e-02, -8.76217529e-03,
        2.34720101e-01,  1.97134822e-02, -1.88514490e-02,  2.04274581e-02,
       -3.22921358e-02, -1.75843985e-02, -1.91361592e-02,  9.42995362e-02,
       -7.06015549e-03,  

THIS IS MY SELF.B WHATEVER THIS IS 


2.844487431201519

I AM MULTIPLYING


array([1.68039004, 2.95376605, 8.06585684, ..., 6.6705784 , 3.86375909,
       1.20136762])

I AM IN THE PREDICT FUNCTION
(31069, 232)


array([[-0.1676111 ,  2.56153032,  0.91589773, ...,  1.09255373,
        -0.14925012, -0.6403622 ],
       [-0.22674567,  0.96789036,  0.84426547, ..., -0.91528679,
        -0.14925012, -0.6379458 ],
       [-0.1676111 , -0.56365972, -0.58837961, ..., -0.91528679,
        -0.14925012, -0.57028651],
       ...,
       [-0.28588025, -0.50156985,  2.16946218, ..., -0.91528679,
        -0.14925012, -0.6427786 ],
       [-0.1676111 , -0.0876374 , -0.64210381, ..., -0.91528679,
        -0.14925012, -0.19332764],
       [-0.28588025,  1.00928361,  0.57564452, ...,  1.09255373,
        -0.14925012, -0.65727702]])

(232,)


array([ 2.22204738e-01, -2.66032275e-01, -2.25638970e-01,  4.17551700e-01,
        5.88381260e-01, -3.64745557e-01, -6.54072532e-01,  1.21655160e+00,
       -6.48274605e-01, -1.72116012e-01, -5.21850353e-02, -3.36642075e-02,
       -2.24371614e-02,  2.70210618e-02, -1.18106282e-01, -4.50346177e-02,
        4.56478151e-02, -7.89429938e-02, -1.73955986e-02,  1.65103758e-03,
       -3.16820024e-02, -4.64300978e-02, -1.98246652e-02, -4.04610174e-01,
        1.24962645e-02, -3.10834192e-02, -4.43045101e-02, -1.02237416e-01,
       -2.01217296e-02,  5.45093628e-02, -1.61658438e-01,  1.22448732e-02,
       -3.20538745e-02, -8.31168856e-02, -7.18777688e-02,  1.56528664e-01,
       -1.03394990e-01, -2.57358380e-02, -5.17246131e-01, -5.08273126e-02,
       -1.05285094e-01,  1.05208750e-01, -2.17427465e-02, -1.74168990e-02,
        4.58570346e-01,  3.68390531e-02, -3.68561616e-02,  3.95528669e-02,
       -6.31357778e-02, -3.40389262e-02, -3.76602181e-02,  1.87581108e-01,
       -1.36671684e-02,  

THIS IS MY SELF.B WHATEVER THIS IS 


5.632085113779008

I AM MULTIPLYING


array([ 3.33824697,  5.88818527, 15.80768731, ..., 13.08889862,
        7.64856632,  2.38813872])

I AM IN THE PREDICT FUNCTION
(31069, 232)


array([[-0.1676111 ,  2.56153032,  0.91589773, ...,  1.09255373,
        -0.14925012, -0.6403622 ],
       [-0.22674567,  0.96789036,  0.84426547, ..., -0.91528679,
        -0.14925012, -0.6379458 ],
       [-0.1676111 , -0.56365972, -0.58837961, ..., -0.91528679,
        -0.14925012, -0.57028651],
       ...,
       [-0.28588025, -0.50156985,  2.16946218, ..., -0.91528679,
        -0.14925012, -0.6427786 ],
       [-0.1676111 , -0.0876374 , -0.64210381, ..., -0.91528679,
        -0.14925012, -0.19332764],
       [-0.28588025,  1.00928361,  0.57564452, ...,  1.09255373,
        -0.14925012, -0.65727702]])

(232,)


array([ 3.18943790e-01, -3.89887274e-01, -3.25764620e-01,  6.05603490e-01,
        8.79089173e-01, -5.32884912e-01, -9.47875023e-01,  1.76811957e+00,
       -9.44387716e-01, -2.52855900e-01, -7.60290517e-02, -4.99551454e-02,
       -3.27988550e-02,  4.07801175e-02, -1.69316870e-01, -6.63194663e-02,
        6.63010156e-02, -1.15777182e-01, -2.51787747e-02,  2.31445541e-03,
       -4.66457001e-02, -6.75512414e-02, -2.88902371e-02, -5.90982601e-01,
        1.90566971e-02, -4.50376387e-02, -6.40156392e-02, -1.50176499e-01,
       -3.01804914e-02,  8.03822332e-02, -2.37012381e-01,  1.87448687e-02,
       -4.62203511e-02, -1.22303029e-01, -1.05430019e-01,  2.31510951e-01,
       -1.51378798e-01, -3.74271761e-02, -7.55543168e-01, -7.37646966e-02,
       -1.55024157e-01,  1.55500041e-01, -3.23872416e-02, -2.59623940e-02,
        6.72120722e-01,  5.15690459e-02, -5.40589347e-02,  5.74576801e-02,
       -9.26002421e-02, -4.94312193e-02, -5.55969165e-02,  2.79790826e-01,
       -1.98476789e-02,  

THIS IS MY SELF.B WHATEVER THIS IS 


8.363930842704946

I AM MULTIPLYING


array([ 4.97402566,  8.80073775, 23.24092997, ..., 19.26660339,
       11.35533517,  3.56088981])

I AM IN THE PREDICT FUNCTION
(31069, 232)


array([[-0.1676111 ,  2.56153032,  0.91589773, ...,  1.09255373,
        -0.14925012, -0.6403622 ],
       [-0.22674567,  0.96789036,  0.84426547, ..., -0.91528679,
        -0.14925012, -0.6379458 ],
       [-0.1676111 , -0.56365972, -0.58837961, ..., -0.91528679,
        -0.14925012, -0.57028651],
       ...,
       [-0.28588025, -0.50156985,  2.16946218, ..., -0.91528679,
        -0.14925012, -0.6427786 ],
       [-0.1676111 , -0.0876374 , -0.64210381, ..., -0.91528679,
        -0.14925012, -0.19332764],
       [-0.28588025,  1.00928361,  0.57564452, ...,  1.09255373,
        -0.14925012, -0.65727702]])

(232,)


array([ 4.06844563e-01, -5.08165418e-01, -4.18148488e-01,  7.80856233e-01,
        1.16732180e+00, -6.92210625e-01, -1.22143092e+00,  2.28496139e+00,
       -1.22324185e+00, -3.30272614e-01, -9.84874824e-02, -6.58947456e-02,
       -4.26333809e-02,  5.46482821e-02, -2.15738969e-01, -8.68409711e-02,
        8.56280692e-02, -1.50988068e-01, -3.23994791e-02,  2.88085099e-03,
       -6.10635914e-02, -8.73866382e-02, -3.74355946e-02, -7.67562013e-01,
        2.57889885e-02, -5.80178934e-02, -8.22269658e-02, -1.96139825e-01,
       -4.02340744e-02,  1.05362166e-01, -3.08965379e-01,  2.54656856e-02,
       -5.92514561e-02, -1.60016124e-01, -1.37495440e-01,  3.04387105e-01,
       -1.97063313e-01, -4.83894807e-02, -9.81261971e-01, -9.51753610e-02,
       -2.02977508e-01,  2.04298091e-01, -4.28831143e-02, -3.43970939e-02,
        8.75909063e-01,  6.40826893e-02, -7.05020440e-02,  7.42177912e-02,
       -1.20751354e-01, -6.38250818e-02, -7.29697573e-02,  3.70880630e-01,
       -2.56269082e-02,  

THIS IS MY SELF.B WHATEVER THIS IS 


11.041139657052366

I AM MULTIPLYING


array([ 6.58813908, 11.68915169, 30.38021065, ..., 25.21473204,
       14.98503377,  4.72013678])

I AM IN THE PREDICT FUNCTION
(31069, 232)


array([[-0.1676111 ,  2.56153032,  0.91589773, ...,  1.09255373,
        -0.14925012, -0.6403622 ],
       [-0.22674567,  0.96789036,  0.84426547, ..., -0.91528679,
        -0.14925012, -0.6379458 ],
       [-0.1676111 , -0.56365972, -0.58837961, ..., -0.91528679,
        -0.14925012, -0.57028651],
       ...,
       [-0.28588025, -0.50156985,  2.16946218, ..., -0.91528679,
        -0.14925012, -0.6427786 ],
       [-0.1676111 , -0.0876374 , -0.64210381, ..., -0.91528679,
        -0.14925012, -0.19332764],
       [-0.28588025,  1.00928361,  0.57564452, ...,  1.09255373,
        -0.14925012, -0.65727702]])

(232,)


array([ 4.86422066e-01, -6.21232702e-01, -5.03289560e-01,  9.44020720e-01,
        1.45297869e+00, -8.43195242e-01, -1.47605979e+00,  2.76921286e+00,
       -1.48582977e+00, -4.04522252e-01, -1.19639464e-01, -8.14899284e-02,
       -5.19714847e-02,  6.85873012e-02, -2.57678650e-01, -1.06639608e-01,
        1.03712670e-01, -1.84669107e-01, -3.90907197e-02,  3.35773703e-03,
       -7.49629200e-02, -1.06012991e-01, -4.54916052e-02, -9.34923241e-01,
        3.26681169e-02, -7.00830528e-02, -9.90276509e-02, -2.40227892e-01,
       -5.02795251e-02,  1.29470595e-01, -3.77691706e-01,  3.23857295e-02,
       -7.12201286e-02, -1.96331638e-01, -1.68147213e-01,  3.75216689e-01,
       -2.40571677e-01, -5.86616052e-02, -1.19508160e+00, -1.15146119e-01,
       -2.49248686e-01,  2.51639333e-01, -5.32322134e-02, -4.27196322e-02,
        1.07044348e+00,  7.45475245e-02, -8.62253887e-02,  8.99044765e-02,
       -1.47651629e-01, -7.72806984e-02, -8.98010769e-02,  4.60808448e-01,
       -3.10287317e-02,  

THIS IS MY SELF.B WHATEVER THIS IS 


13.664804295112837

I AM MULTIPLYING


array([ 8.18096671, 14.55138372, 37.2393901 , ..., 30.94375513,
       18.53867517,  5.86633746])

I AM IN THE PREDICT FUNCTION
(31069, 232)


array([[-0.1676111 ,  2.56153032,  0.91589773, ...,  1.09255373,
        -0.14925012, -0.6403622 ],
       [-0.22674567,  0.96789036,  0.84426547, ..., -0.91528679,
        -0.14925012, -0.6379458 ],
       [-0.1676111 , -0.56365972, -0.58837961, ..., -0.91528679,
        -0.14925012, -0.57028651],
       ...,
       [-0.28588025, -0.50156985,  2.16946218, ..., -0.91528679,
        -0.14925012, -0.6427786 ],
       [-0.1676111 , -0.0876374 , -0.64210381, ..., -0.91528679,
        -0.14925012, -0.19332764],
       [-0.28588025,  1.00928361,  0.57564452, ...,  1.09255373,
        -0.14925012, -0.65727702]])

(232,)


array([ 5.58163075e-01, -7.29431461e-01, -5.81655929e-01,  1.09576904e+00,
        1.73597024e+00, -9.86285915e-01, -1.71299793e+00,  3.22288097e+00,
       -1.73308816e+00, -4.75752989e-01, -1.39559707e-01, -9.67474894e-02,
       -6.08421778e-02,  8.25621163e-02, -2.95424189e-01, -1.25753442e-01,
        1.20633398e-01, -2.16908022e-01, -4.52836729e-02,  3.75217151e-03,
       -8.83694001e-02, -1.23502573e-01, -5.30873797e-02, -1.09360643e+00,
        3.96708393e-02, -8.12886933e-02, -1.14501953e-01, -2.82535612e-01,
       -6.03138212e-02,  1.52729161e-01, -4.43356188e-01,  3.94847019e-02,
       -8.21950102e-02, -2.31320664e-01, -1.97454759e-01,  4.44057906e-01,
       -2.82020143e-01, -6.82804056e-02, -1.39764359e+00, -1.33759081e-01,
       -2.93934635e-01,  2.97560085e-01, -6.34363011e-02, -5.09288310e-02,
        1.25620402e+00,  8.31201282e-02, -1.01266626e-01,  1.04584672e-01,
       -1.73360445e-01, -8.98548346e-02, -1.06112110e-01,  5.49537739e-01,
       -3.60757475e-02,  

THIS IS MY SELF.B WHATEVER THIS IS 


16.2359956404121

I AM MULTIPLYING


array([ 9.752858  , 17.3856028 , 43.83160617, ..., 36.46360576,
       22.01730922,  6.99989721])

I AM IN THE PREDICT FUNCTION
(31069, 232)


array([[-0.1676111 ,  2.56153032,  0.91589773, ...,  1.09255373,
        -0.14925012, -0.6403622 ],
       [-0.22674567,  0.96789036,  0.84426547, ..., -0.91528679,
        -0.14925012, -0.6379458 ],
       [-0.1676111 , -0.56365972, -0.58837961, ..., -0.91528679,
        -0.14925012, -0.57028651],
       ...,
       [-0.28588025, -0.50156985,  2.16946218, ..., -0.91528679,
        -0.14925012, -0.6427786 ],
       [-0.1676111 , -0.0876374 , -0.64210381, ..., -0.91528679,
        -0.14925012, -0.19332764],
       [-0.28588025,  1.00928361,  0.57564452, ...,  1.09255373,
        -0.14925012, -0.65727702]])

(232,)


array([ 6.22527672e-01, -8.33081831e-01, -6.53686651e-01,  1.23673669e+00,
        2.01621698e+00, -1.12190577e+00, -1.93340355e+00,  3.64785156e+00,
       -1.96590077e+00, -5.44105497e-01, -1.58318741e-01, -1.11674102e-01,
       -6.92728337e-02,  9.65406530e-02, -3.29247074e-01, -1.44218277e-01,
        1.36464023e-01, -2.47787167e-01, -5.10077836e-02,  4.07078282e-03,
       -1.01307304e-01, -1.39923479e-01, -6.02503697e-02, -1.24411912e+00,
        4.67755834e-02, -9.16872775e-02, -1.28729490e-01, -3.23152634e-01,
       -7.03338929e-02,  1.75159636e-01, -5.06114734e-01,  4.67435294e-02,
       -9.22406902e-02, -2.65050179e-01, -2.25483937e-01,  5.10967592e-01,
       -3.21518477e-01, -7.72808431e-02, -1.58955409e+00, -1.51091903e-01,
       -3.37126117e-01,  3.42096469e-01, -7.34970612e-02, -5.90236905e-02,
        1.43364431e+00,  8.99467923e-02, -1.15661299e-01,  1.18321231e-01,
       -1.97934206e-01, -1.01601025e-01, -1.21923053e-01,  6.37037071e-01,
       -4.07893423e-02,  

THIS IS MY SELF.B WHATEVER THIS IS 


18.755763158805376

I AM MULTIPLYING


array([11.30413491, 20.19017446, 50.16931279, ..., 41.78370823,
       25.42202368,  8.12117329])

I AM IN THE PREDICT FUNCTION
(31069, 232)


array([[-0.1676111 ,  2.56153032,  0.91589773, ...,  1.09255373,
        -0.14925012, -0.6403622 ],
       [-0.22674567,  0.96789036,  0.84426547, ..., -0.91528679,
        -0.14925012, -0.6379458 ],
       [-0.1676111 , -0.56365972, -0.58837961, ..., -0.91528679,
        -0.14925012, -0.57028651],
       ...,
       [-0.28588025, -0.50156985,  2.16946218, ..., -0.91528679,
        -0.14925012, -0.6427786 ],
       [-0.1676111 , -0.0876374 , -0.64210381, ..., -0.91528679,
        -0.14925012, -0.19332764],
       [-0.28588025,  1.00928361,  0.57564452, ...,  1.09255373,
        -0.14925012, -0.65727702]])

(232,)


array([ 6.79950714e-01, -9.32483121e-01, -7.19793483e-01,  1.36752462e+00,
        2.29364886e+00, -1.25045518e+00, -2.13836154e+00,  4.04589654e+00,
       -2.18510134e+00, -6.09713345e-01, -1.75983140e-01, -1.26276321e-01,
       -7.72892780e-02,  1.10493621e-01, -3.59402944e-01, -1.62067786e-01,
        1.51273790e-01, -2.77383846e-01, -5.62908595e-02,  4.31979364e-03,
       -1.13799543e-01, -1.55339859e-01, -6.70064596e-02, -1.38693825e+00,
        5.39623461e-02, -1.01328323e-01, -1.41785492e-01, -3.62163645e-01,
       -8.03366418e-02,  1.96783850e-01, -5.66114826e-01,  5.41442978e-02,
       -1.01417935e-01, -2.97583291e-01, -2.52297230e-01,  5.76001226e-01,
       -3.59170318e-01, -8.56960823e-02, -1.77138584e+00, -1.67218029e-01,
       -3.78908100e-01,  3.85284346e-01, -8.34161077e-02, -6.70033792e-02,
        1.60319299e+00,  9.51641627e-02, -1.29442955e-01,  1.31173173e-01,
       -2.21426493e-01, -1.12569752e-01, -1.37253119e-01,  7.23279727e-01,
       -4.51897529e-02,  

THIS IS MY SELF.B WHATEVER THIS IS 


21.225135326830788

I AM MULTIPLYING


array([12.83509525, 22.96364707, 56.26431799, ..., 46.91300631,
       28.75393254,  9.23047999])

I AM IN THE PREDICT FUNCTION
(31069, 232)


array([[-0.1676111 ,  2.56153032,  0.91589773, ...,  1.09255373,
        -0.14925012, -0.6403622 ],
       [-0.22674567,  0.96789036,  0.84426547, ..., -0.91528679,
        -0.14925012, -0.6379458 ],
       [-0.1676111 , -0.56365972, -0.58837961, ..., -0.91528679,
        -0.14925012, -0.57028651],
       ...,
       [-0.28588025, -0.50156985,  2.16946218, ..., -0.91528679,
        -0.14925012, -0.6427786 ],
       [-0.1676111 , -0.0876374 , -0.64210381, ..., -0.91528679,
        -0.14925012, -0.19332764],
       [-0.28588025,  1.00928361,  0.57564452, ...,  1.09255373,
        -0.14925012, -0.65727702]])

(232,)


array([ 7.30843208e-01, -1.02791511e+00, -7.80362529e-01,  1.48870107e+00,
        2.56820460e+00, -1.37231303e+00, -2.32888806e+00,  4.41868069e+00,
       -2.39147644e+00, -6.72703371e-01, -1.92615751e-01, -1.40560581e-01,
       -8.49158746e-02,  1.24394328e-01, -3.86132480e-01, -1.79333648e-01,
        1.65127697e-01, -3.05770630e-01, -6.11591611e-02,  4.50504340e-03,
       -1.25867748e-01, -1.69812144e-01, -7.33800533e-02, -1.52251198e+00,
        6.12125977e-02, -1.10258562e-01, -1.53741033e-01, -3.99648654e-01,
       -9.03189578e-02,  2.17623629e-01, -6.23495994e-01,  6.16701889e-02,
       -1.09783904e-01, -3.28979474e-01, -2.77953924e-01,  6.39212934e-01,
       -3.95073532e-01, -9.35575830e-02, -1.94368003e+00, -1.82206913e-01,
       -4.19360126e-01,  4.27159253e-01, -9.31949920e-02, -7.48672242e-02,
        1.76525516e+00,  9.88998393e-02, -1.42643261e-01,  1.43195911e-01,
       -2.43888212e-01, -1.22808616e-01, -1.52120594e-01,  8.08243330e-01,
       -4.92961256e-02,  

THIS IS MY SELF.B WHATEVER THIS IS 


23.64512005149569

I AM MULTIPLYING


array([14.34601502, 25.7047384 , 62.12781899, ..., 51.85998915,
       32.01417368, 10.32809267])

ValueError: Dot product shape mismatch, (7768, 232) vs (3,)

<IPython.core.display.Javascript object>

In [None]:
### WARNING: BE CAREFUL WHEN RUNNING THIS, IT WILL USE YOUR ENTIRE CPU!!!

# # Performing Elastic Net with sklearn and CV
# import time
# from sklearn.linear_model import ElasticNetCV
# from sklearn.model_selection import RepeatedKFold
# import warnings

# # Muting the 1000 warnings this cell will output
# warnings.filterwarnings("ignore")

# # Measure run time
# start = time.time()

# # Method of model eval
# cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=42)

# # Defining lists for model eval
# ratios = np.arange(0, 1, 0.01)
# alphas = np.arange(
#     0, 1, 0.01
# )  # [0.0, 1e-5, 1e-4, 1e-3, 1e-2, 0.1, 0.5, 0.75, 1.0, 10.0, 100.0]


# enm = ElasticNetCV(
#     l1_ratio=ratios, alphas=alphas, fit_intercept=False, cv=cv, n_jobs=-1
# )  # n_jobs = -1 will use all processors on CPU
# enm.fit(Xtrain, Ytrain)


# print("--- %s seconds ---" % (time.time() - start))

In [None]:
# Lets take the coefficients of training and the names of our columns and put them into a dataframe
variables = Xtrain.columns.tolist()
model_coef = pd.DataFrame({"coef": enm.coef_, "names": variables})

print("alpha: %f" % enm.alpha_)
print("l1_ratio_: %f" % enm.l1_ratio_)

# show all the coef that are non zero in descending order
display(model_coef[model_coef["coef"] != 0].sort_values(by="coef", ascending=False))


# print(enm.mse_path_[0][0])
enm.alphas_

In [None]:
# These next three cells will be dedicated to plotting values used in CV
MSE = []
l1col = np.concatenate([np.repeat(x, 100) for x in np.arange(0, 1, 0.01)])
alphacol = np.flip(
    np.concatenate([np.arange(0, 1, 0.01) for x in np.arange(0, 1, 0.01)])
)
for i in range(len(enm.mse_path_)):
    for j in range(len(enm.mse_path_[i])):
        MSE.append(enm.mse_path_[i][j])

In [None]:
kFolds = pd.DataFrame(np.array(MSE))
minkFolds = pd.DataFrame(kFolds.min(axis=1), columns=["min"])
graphdf = pd.DataFrame({"l1": l1col, "alpha": alphacol})
graphdf = graphdf.join(minkFolds)
# display(graphdf)

In [None]:
# Plotting 3 dimensional plot
fig = go.Figure(
    data=px.scatter_3d(
        graphdf,
        x="l1",
        y="alpha",
        z="min",
        opacity=0.05,
        labels={
            "l1": "LASSO parameter",
            "alpha": "Ridge parameter",
            "min": "Minimumn MSE",
        },
        color_discrete_sequence=px.colors.qualitative.Safe,
        title="Minimum MSE in 3 Trial 10-Fold CV with Combined LASSO and Ridge Penalty Values",
    )
)
fig.show()

In [None]:
# Now lets predict with our testing data
predictions = enm.predict(Xtest)

# Compared to the actual values
pred_v_act = pd.DataFrame({"predictions": enm.predict(Xtest), "actual": Ytest})
display(pred_v_act)

from sklearn.metrics import *

# Metrics
score = r2_score(Ytest, predictions)
MAE = mean_absolute_error(Ytest, predictions)
MSE = mean_squared_error(Ytest, predictions)

# Printing of Metrics
print("The r-squared value is ", score)
print(
    "The adjusted r-squared value is ",
    1
    - (
        ((1 - score) * (len(pred_v_act.index) - 1))
        / (len(pred_v_act) - len(pred_v_act.columns) - 1)
    ),
)
print("The Mean Absolute Error is ", MAE)
print("The root Mean Squared Error is ", np.sqrt(MSE))