#### Calculating Estimates without regularization (Normal Linear Regression)

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import datasets

In [2]:
data = datasets.load_boston()

In [3]:
dataframe = pd.DataFrame(data.data)

In [4]:
dataframe

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.0900,1.0,296.0,15.3,396.90,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.90,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.90,5.33
...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0.0,0.573,6.593,69.1,2.4786,1.0,273.0,21.0,391.99,9.67
502,0.04527,0.0,11.93,0.0,0.573,6.120,76.7,2.2875,1.0,273.0,21.0,396.90,9.08
503,0.06076,0.0,11.93,0.0,0.573,6.976,91.0,2.1675,1.0,273.0,21.0,396.90,5.64
504,0.10959,0.0,11.93,0.0,0.573,6.794,89.3,2.3889,1.0,273.0,21.0,393.45,6.48


In [5]:
dataframe.columns = data.feature_names 

In [6]:
dataframe

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.0900,1.0,296.0,15.3,396.90,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.90,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.90,5.33
...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0.0,0.573,6.593,69.1,2.4786,1.0,273.0,21.0,391.99,9.67
502,0.04527,0.0,11.93,0.0,0.573,6.120,76.7,2.2875,1.0,273.0,21.0,396.90,9.08
503,0.06076,0.0,11.93,0.0,0.573,6.976,91.0,2.1675,1.0,273.0,21.0,396.90,5.64
504,0.10959,0.0,11.93,0.0,0.573,6.794,89.3,2.3889,1.0,273.0,21.0,393.45,6.48


In [7]:
dataframe_target = np.asarray(data.target)

In [8]:
dataframe['HOUSE PRICE'] = pd.Series(dataframe_target)

In [9]:
dataframe

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,HOUSE PRICE
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.0900,1.0,296.0,15.3,396.90,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.90,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.90,5.33,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0.0,0.573,6.593,69.1,2.4786,1.0,273.0,21.0,391.99,9.67,22.4
502,0.04527,0.0,11.93,0.0,0.573,6.120,76.7,2.2875,1.0,273.0,21.0,396.90,9.08,20.6
503,0.06076,0.0,11.93,0.0,0.573,6.976,91.0,2.1675,1.0,273.0,21.0,396.90,5.64,23.9
504,0.10959,0.0,11.93,0.0,0.573,6.794,89.3,2.3889,1.0,273.0,21.0,393.45,6.48,22.0


In [10]:
x = dataframe.iloc[:,:13]
y = dataframe.iloc[:,-1]

In [11]:
x

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.0900,1.0,296.0,15.3,396.90,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.90,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.90,5.33
...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0.0,0.573,6.593,69.1,2.4786,1.0,273.0,21.0,391.99,9.67
502,0.04527,0.0,11.93,0.0,0.573,6.120,76.7,2.2875,1.0,273.0,21.0,396.90,9.08
503,0.06076,0.0,11.93,0.0,0.573,6.976,91.0,2.1675,1.0,273.0,21.0,396.90,5.64
504,0.10959,0.0,11.93,0.0,0.573,6.794,89.3,2.3889,1.0,273.0,21.0,393.45,6.48


In [12]:
y

0      24.0
1      21.6
2      34.7
3      33.4
4      36.2
       ... 
501    22.4
502    20.6
503    23.9
504    22.0
505    11.9
Name: HOUSE PRICE, Length: 506, dtype: float64

In [13]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25)

In [14]:
model = LinearRegression()
model.fit(x_train, y_train)
y_predict = model.predict(x_test)

In [15]:
mean_square_error = np.mean((y_predict - y_test)**2)
mean_square_error

27.608629053813832

In [16]:
model_coefficients = pd.DataFrame()
model_coefficients['Columns'] = x_train.columns
model_coefficients['Coefficients Estimates'] = model.coef_
print(model_coefficients)

    Columns  Coefficients Estimates
0      CRIM               -0.117085
1        ZN                0.037995
2     INDUS                0.021390
3      CHAS                3.890230
4       NOX              -16.905906
5        RM                4.979261
6       AGE               -0.012849
7       DIS               -1.558779
8       RAD                0.308903
9       TAX               -0.012083
10  PTRATIO               -0.923314
11        B                0.010573
12    LSTAT               -0.450546


#### Calculating Estimates with ridge regularization

In [17]:
from sklearn.linear_model import Ridge

In [18]:
ridge = Ridge(alpha=1)
ridge.fit(x_train, y_train)
y_predict = ridge.predict(x_test)

In [19]:
y_predict

array([19.12422281, 19.36267819, 19.28062571, 35.93032192, 27.68743448,
       31.98981087, 19.40919435, 16.56683621,  7.30689641, 21.67523392,
       23.60421907, 16.71916176, 20.29094205, 43.72642313, 20.51384567,
       33.5558255 , 22.50240287, 21.46240439, 20.58267627, 26.61586889,
       20.23127411, 13.62006792, 26.76388625, 24.21191929, 19.035755  ,
       15.26990104, 26.03083694, 32.91913822, 13.5328194 , 27.60630548,
       16.0756514 , 22.9177829 , 20.9716858 , 41.8364428 , 19.17400917,
       34.48020347, 15.90147266, 28.52697913, 18.60918213, 19.64884004,
       24.83623744, 16.65109337, 28.90873785, 18.08393676, 38.74334278,
       25.85172418,  7.63111322, 33.04965619, 32.73085291, 42.78112172,
       19.11703528, 28.55806406, 14.73773409, 21.3978907 , 16.10816104,
       12.16327403, 32.20476843,  9.73954113, 21.32067886, 22.61084444,
       16.4517379 , 41.39080664, 11.47164343, 17.64278083, 15.09553514,
       19.81130807, 28.2508685 , 22.9906265 , 19.56794024, 22.55

In [20]:
mean_square_error = np.mean((y_predict - y_test) ** 2)
mean_square_error

27.726175090288113

In [21]:
ridge_coefficient = pd.DataFrame() 
ridge_coefficient["Columns"]= x_train.columns 
ridge_coefficient['Coefficient Estimate'] = pd.Series(ridge.coef_) 
print(ridge_coefficient) 

    Columns  Coefficient Estimate
0      CRIM             -0.113395
1        ZN              0.039781
2     INDUS             -0.009576
3      CHAS              3.649505
4       NOX             -8.954470
5        RM              4.993909
6       AGE             -0.018635
7       DIS             -1.437700
8       RAD              0.294841
9       TAX             -0.012863
10  PTRATIO             -0.836440
11        B              0.010970
12    LSTAT             -0.468305


#### Calculating Estimates with lasso regularization

In [22]:
from sklearn.linear_model import Lasso 

lasso = Lasso(alpha = 1) 
lasso.fit(x_train, y_train) 
y_pred = lasso.predict(x_test) 

mean_squared_error = np.mean((y_pred - y_test)**2) 
print("Mean squared error on test set", mean_squared_error) 

lasso_coeff = pd.DataFrame() 
lasso_coeff["Columns"] = x_train.columns 
lasso_coeff['Coefficient Estimate'] = pd.Series(lasso.coef_) 

print(lasso_coeff) 

Mean squared error on test set 22.960049314673846
    Columns  Coefficient Estimate
0      CRIM             -0.080156
1        ZN              0.042731
2     INDUS             -0.000000
3      CHAS              0.000000
4       NOX             -0.000000
5        RM              1.640848
6       AGE              0.013451
7       DIS             -0.774895
8       RAD              0.285971
9       TAX             -0.014763
10  PTRATIO             -0.738577
11        B              0.009659
12    LSTAT             -0.743757


#### Calculating Estimates with ElasticNet regularization

In [23]:
from sklearn.linear_model import ElasticNet 

e_net = ElasticNet(alpha = .4,l1_ratio=.5) 
e_net.fit(x_train, y_train) 
y_pred_elastic = e_net.predict(x_test) 

mean_squared_error = np.mean((y_pred_elastic - y_test)**2) 
print("Mean Squared Error on test set", mean_squared_error) 

e_net_coeff = pd.DataFrame() 
e_net_coeff["Columns"] = x_train.columns 
e_net_coeff['Coefficient Estimate'] = pd.Series(e_net.coef_) 
e_net_coeff 

Mean Squared Error on test set 23.535431446482548


Unnamed: 0,Columns,Coefficient Estimate
0,CRIM,-0.107158
1,ZN,0.049856
2,INDUS,-0.022335
3,CHAS,0.07873
4,NOX,-0.0
5,RM,2.43193
6,AGE,0.000121
7,DIS,-1.148747
8,RAD,0.345999
9,TAX,-0.016542
