### Imports

In [11]:
import numpy as np
# Imports
import pandas as pd
from sklearn.linear_model import ElasticNet

### Train and Test data split

In [3]:
# Train and Test data
train = pd.read_csv("../data/processed/train_winequality.csv", sep=",")
test = pd.read_csv("../data/processed/test_winequality.csv", sep=",")

In [4]:
print("Train Dataset:\n")
train.head()

Train Dataset:



Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol,TARGET
0,8.7,0.69,0.31,3.0,0.086,23.0,81.0,1.0002,3.48,0.74,11.6,6
1,6.1,0.21,0.4,1.4,0.066,40.5,165.0,0.9912,3.25,0.59,11.9,6
2,10.9,0.39,0.47,1.8,0.118,6.0,14.0,0.9982,3.3,0.75,9.8,6
3,8.8,0.685,0.26,1.6,0.088,16.0,23.0,0.99694,3.32,0.47,9.4,5
4,8.4,1.035,0.15,6.0,0.073,11.0,54.0,0.999,3.37,0.49,9.9,5


In [5]:
print("Test Dataset:\n")
test.head()

Test Dataset:



Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol,TARGET
0,7.7,0.56,0.08,2.5,0.114,14.0,46.0,0.9971,3.24,0.66,9.6,6
1,7.8,0.5,0.17,1.6,0.082,21.0,102.0,0.996,3.39,0.48,9.5,5
2,10.7,0.67,0.22,2.7,0.107,17.0,34.0,1.0004,3.28,0.98,9.9,6
3,8.5,0.46,0.31,2.25,0.078,32.0,58.0,0.998,3.33,0.54,9.8,5
4,6.7,0.46,0.24,1.7,0.077,18.0,34.0,0.9948,3.39,0.6,10.6,6


In [6]:
train_y = train["TARGET"]
test_y = test["TARGET"]
train_x = train.drop("TARGET", axis=1)
test_x = test.drop("TARGET", axis=1)

### Modeling

In [7]:
lr = ElasticNet(alpha=0.9, l1_ratio=0.4, random_state=42)
lr.fit(train_x, train_y)

In [8]:
predicted_qualities = lr.predict(test_x)

In [9]:
predicted_qualities

array([5.61940061, 5.34754958, 5.69403173, 5.61985775, 5.69873937,
       5.55568692, 5.11675003, 5.74885071, 5.69558124, 5.73100239,
       5.75889485, 5.77167817, 5.69723269, 5.67879833, 5.59090194,
       5.77914807, 5.44028024, 5.58477369, 5.74814051, 5.53648326,
       5.77149048, 5.6730885 , 5.7223836 , 5.718262  , 5.75732474,
       5.70744403, 5.6498246 , 5.6207195 , 5.41094452, 5.70662963,
       5.5491627 , 5.66252422, 5.71993567, 5.74529455, 5.69049596,
       5.46536752, 5.73853539, 5.75690633, 5.69832301, 5.78310225,
       5.70277942, 5.49022417, 5.76376969, 5.26860906, 5.7425324 ,
       5.74261621, 5.72545997, 5.70706843, 5.5510042 , 5.76362266,
       5.21318161, 5.54077226, 5.77343597, 5.41318653, 5.51298832,
       5.42264421, 5.74207116, 5.57994168, 5.68865467, 5.6991374 ,
       5.61301909, 5.74276324, 5.66285677, 5.60138693, 5.74150572,
       5.59939635, 5.74830792, 5.66714805, 5.71142108, 5.48166428,
       5.67735754, 5.50083149, 5.70903431, 5.72091769, 5.73458

In [12]:
# Evaluation
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

rmse = np.sqrt(mean_squared_error(test_y, predicted_qualities))
print("rmse: ", rmse)

mae = mean_absolute_error(test_y, predicted_qualities)
print("mae: ", mae)

r2 = r2_score(test_y, predicted_qualities)
print("r2: ", r2)

rmse:  0.8031192316982253
mae:  0.6551462834843993
r2:  0.013014969842178092
