# Ridge regression machine learning

In [22]:
import pandas as pd
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split

In [3]:
wine_quality = pd.read_csv("winequality-red.csv",sep=';')
wine_quality.rename(columns=lambda x: x.replace(" ", "_"), inplace=True)

In [4]:
wine_quality.head().T

Unnamed: 0,0,1,2,3,4
fixed_acidity,7.4,7.8,7.8,11.2,7.4
volatile_acidity,0.7,0.88,0.76,0.28,0.7
citric_acid,0.0,0.0,0.04,0.56,0.0
residual_sugar,1.9,2.6,2.3,1.9,1.9
chlorides,0.076,0.098,0.092,0.075,0.076
free_sulfur_dioxide,11.0,25.0,15.0,17.0,11.0
total_sulfur_dioxide,34.0,67.0,54.0,60.0,34.0
density,0.9978,0.9968,0.997,0.998,0.9978
pH,3.51,3.2,3.26,3.16,3.51
sulphates,0.56,0.68,0.65,0.58,0.56


In [5]:
all_colnms = ['fixed_acidity', 'volatile_acidity', 'citric_acid', 'residual_sugar', 'chlorides', 'free_sulfur_dioxide', 'total_sulfur_dioxide', 'density', 'pH', 'sulphates', 'alcohol']

In [6]:
pdx = wine_quality[all_colnms]
pdy = wine_quality["quality"]

In [9]:
x_train,x_test,y_train,y_test = train_test_split(pdx,pdy,train_size = 0.7,random_state=42)

# Various values of alphas are to be tested in a grid search to test the model's fitness:

In [10]:
alphas = [1e-4,1e-3,1e-2,0.1,0.5,1.0,5.0,10.0]

In [21]:

initrsq = 0
for alph in alphas:
    ridge_reg = Ridge(alpha=alph)
    ridge_reg.fit(x_train,y_train)
    tr_rsqrd = ridge_reg.score(x_train,y_train)
    ts_rsqrd = ridge_reg.score(x_test,y_test)
    
    
    if ts_rsqrd > initrsq:
        print ("\nRidge Regression: Best Parameters\n")
        print ("Lambda: ",alph,"Train R-Squaredvalue:",round(tr_rsqrd,5),"Test R-squared value:",round(ts_rsqrd,5))
        initrsq = ts_rsqrd


Ridge Regression: Best Parameters

Lambda:  0.0001 Train R-Squaredvalue: 0.3612 Test R-squared value: 0.35135


# Lasso regression machine learning model

In [23]:
alphas = [1e-4,1e-3,1e-2,0.1,0.5,1.0,5.0,10.0]

In [25]:
initrsq = 0
for alph in alphas:
    lasso_reg = Lasso(alpha=alph)
    lasso_reg.fit(x_train,y_train)
    tr_rsqrd = lasso_reg.score(x_train,y_train)
    ts_rsqrd = lasso_reg.score(x_test,y_test)
    
    
    if ts_rsqrd > initrsq:
        print ("\nRidge Regression: Best Parameters\n")
        print ("Lambda: ",alph,"Train R-Squaredvalue:",round(tr_rsqrd,5),"Test R-squared value:",round(ts_rsqrd,5))
        initrsq = ts_rsqrd


Ridge Regression: Best Parameters

Lambda:  0.0001 Train R-Squaredvalue: 0.36101 Test R-squared value: 0.35057


In [26]:
ridge_reg = Ridge(alpha=0.001)
ridge_reg.fit(x_train,y_train)
print ("\nRidge Regression coefficient values of Alpha = 0.001\n")
for i in range(11):
    print (all_colnms[i],": ",ridge_reg.coef_[i])
lasso_reg = Lasso(alpha=0.001)
lasso_reg.fit(x_train,y_train)



print ("\nLasso Regression coefficient values of Alpha = 0.001\n")
for i in range(11):
    print (all_colnms[i],": ",lasso_reg.coef_[i])


Ridge Regression coefficient values of Alpha = 0.001

fixed_acidity :  0.015506587508042666
volatile_acidity :  -1.1050982354876902
citric_acid :  -0.24879865532350992
residual_sugar :  0.00401889539283461
chlorides :  -1.6843839620863434
free_sulfur_dioxide :  0.004636901710963169
total_sulfur_dioxide :  -0.0032837679041055204
density :  -5.567271746801942
pH :  -0.36248001720401146
sulphates :  0.8009191228025597
alcohol :  0.2999182442952113

Lasso Regression coefficient values of Alpha = 0.001

fixed_acidity :  0.014149546369062405
volatile_acidity :  -1.0906236090493846
citric_acid :  -0.18529515004737002
residual_sugar :  -0.00013661024678721814
chlorides :  -1.058775797041006
free_sulfur_dioxide :  0.004831648175148986
total_sulfur_dioxide :  -0.003267228855959229
density :  -0.0
pH :  -0.25690192587072996
sulphates :  0.6944875403164108
alcohol :  0.3077561491242808
