In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge,Lasso
from sklearn.metrics import mean_squared_error
from sklearn.datasets import load_diabetes

In [2]:
diabetes = load_diabetes()

In [3]:
diabetes

{'data': array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
          0.01990749, -0.01764613],
        [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
         -0.06833155, -0.09220405],
        [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
          0.00286131, -0.02593034],
        ...,
        [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
         -0.04688253,  0.01549073],
        [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
          0.04452873, -0.02593034],
        [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
         -0.00422151,  0.00306441]], shape=(442, 10)),
 'target': array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,
         69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,
         68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,
         87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,
        259.,  53., 190., 142.,  75., 142.

In [5]:
df = pd.DataFrame(diabetes.data,columns=diabetes.feature_names)

In [6]:
df

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
0,0.038076,0.050680,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204
2,0.085299,0.050680,0.044451,-0.005670,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.025930
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641
...,...,...,...,...,...,...,...,...,...,...
437,0.041708,0.050680,0.019662,0.059744,-0.005697,-0.002566,-0.028674,-0.002592,0.031193,0.007207
438,-0.005515,0.050680,-0.015906,-0.067642,0.049341,0.079165,-0.028674,0.034309,-0.018114,0.044485
439,0.041708,0.050680,-0.015906,0.017293,-0.037344,-0.013840,-0.024993,-0.011080,-0.046883,0.015491
440,-0.045472,-0.044642,0.039062,0.001215,0.016318,0.015283,-0.028674,0.026560,0.044529,-0.025930


In [7]:
df['target'] = diabetes.target

In [8]:
df

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.050680,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.050680,0.044451,-0.005670,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.025930,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0
...,...,...,...,...,...,...,...,...,...,...,...
437,0.041708,0.050680,0.019662,0.059744,-0.005697,-0.002566,-0.028674,-0.002592,0.031193,0.007207,178.0
438,-0.005515,0.050680,-0.015906,-0.067642,0.049341,0.079165,-0.028674,0.034309,-0.018114,0.044485,104.0
439,0.041708,0.050680,-0.015906,0.017293,-0.037344,-0.013840,-0.024993,-0.011080,-0.046883,0.015491,132.0
440,-0.045472,-0.044642,0.039062,0.001215,0.016318,0.015283,-0.028674,0.026560,0.044529,-0.025930,220.0


In [9]:
x = df.drop(columns=['target'])

In [10]:
x

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
0,0.038076,0.050680,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204
2,0.085299,0.050680,0.044451,-0.005670,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.025930
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641
...,...,...,...,...,...,...,...,...,...,...
437,0.041708,0.050680,0.019662,0.059744,-0.005697,-0.002566,-0.028674,-0.002592,0.031193,0.007207
438,-0.005515,0.050680,-0.015906,-0.067642,0.049341,0.079165,-0.028674,0.034309,-0.018114,0.044485
439,0.041708,0.050680,-0.015906,0.017293,-0.037344,-0.013840,-0.024993,-0.011080,-0.046883,0.015491
440,-0.045472,-0.044642,0.039062,0.001215,0.016318,0.015283,-0.028674,0.026560,0.044529,-0.025930


In [11]:
y = df['target']

In [12]:
y

0      151.0
1       75.0
2      141.0
3      206.0
4      135.0
       ...  
437    178.0
438    104.0
439    132.0
440    220.0
441     57.0
Name: target, Length: 442, dtype: float64

In [13]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=.2)

In [15]:
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled  = scaler.transform(x_test)

In [16]:
lasso_model = Lasso()
ridge_model = Ridge()

In [None]:
lasso_model.fit(x_train_scaled,y_train)


In [19]:
ridge_model.fit(x_train_scaled,y_train)

In [20]:
lasso_model.predict(x_test_scaled)

array([ 57.41763967, 177.52317814, 108.80062611, 125.57714177,
       127.57092415, 106.32224068, 105.53880396, 210.26063934,
       152.25595332, 265.74866676, 146.62790994, 174.56336314,
       101.60122593, 199.60852714, 185.88348182, 202.0007006 ,
       164.21630532, 193.63122859, 126.72193208,  98.66769605,
       152.68324073,  54.98554513,  71.3398991 , 205.93495347,
       263.51697456,  55.26382331, 150.02753765,  94.85029091,
       114.01285676,  61.57016925, 141.5410891 , 114.63754852,
       143.28668364, 102.80489401, 142.26209737, 170.0839821 ,
       155.45756438,  88.17336348, 123.69184729, 139.16740517,
       255.54240211, 100.94917742, 204.63581136, 282.39533307,
       183.96008828,  94.15143761, 168.95341606, 133.40083798,
        45.4344151 ,  53.73423612,  69.57334419,  79.77900011,
       215.6246588 , 223.52475192, 126.48978685, 162.69537301,
       109.58727442, 156.37342921, 104.37761451,  86.56004226,
       167.39063369, 122.64625199, 136.00230848,  87.08

In [21]:
ridge_model.predict(x_test_scaled)

array([ 52.28603009, 184.0184653 , 113.85829446, 130.6158662 ,
       126.19814994, 110.11991199, 105.75488764, 211.63980721,
       155.75971046, 258.84310792, 141.10056826, 179.21073357,
       104.61169717, 204.66786375, 184.20073525, 205.46482838,
       164.94327585, 197.60686969, 132.26885976,  90.69738696,
       152.49625978,  44.98308213,  68.71442686, 209.44853793,
       256.31841494,  52.03467117, 154.36052096,  84.53262203,
       112.84437505,  64.86659328, 140.77511063, 115.19804423,
       148.58210149,  97.24027257, 137.5112301 , 165.55122744,
       157.60254495,  86.22957276, 123.39548728, 141.37194264,
       259.83495013, 101.79423382, 204.53645411, 280.29055384,
       189.52653148,  87.85255777, 170.86839693, 137.57286341,
        38.15718515,  40.86983445,  62.69520462,  77.7184743 ,
       218.41325688, 230.10096216, 122.29975164, 164.60732691,
       107.87788611, 158.25544557,  95.47611058,  81.56200111,
       163.92471488, 116.37983616, 138.76905075,  84.61

In [22]:
mean_squared_error(y_test,ridge_model.predict(x_test_scaled))

2631.383111945432

In [23]:
mean_squared_error(y_test,lasso_model.predict(x_test_scaled))

2692.6114720973596

In [24]:
from sklearn.metrics import r2_score

In [25]:
r2_score(y_test,ridge_model.predict(x_test_scaled))

0.5858931293576692

In [26]:
import pickle

In [27]:
with open("Ridge_model.pkl",'wb') as file:
    pickle.dump(ridge_model,file)

In [30]:
with open("Lasso_model.pkl",'wb') as file:
    pickle.dump(lasso_model,file)