# 0.0. Imports

In [48]:
import requests
import pandas as pd
import numpy  as np
import pickle

from sklearn.model_selection import train_test_split
from sklearn.linear_model    import ElasticNet
from sklearn.metrics         import mean_squared_error, mean_absolute_error, r2_score

## 0.1. Loading Data

In [10]:
csv_url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
df_raw = pd.read_csv( csv_url, sep=';' )

# 1.0. Data Preparation

In [11]:
df1 = df_raw.copy()

In [12]:
train, test = train_test_split( df1 ) #75% e 25% test

In [13]:
# train and test dataset
x_train = train.drop( 'quality', axis=1 )
y_train = train['quality']

x_test = test.drop( 'quality', axis=1 )
y_test = test['quality']

# 2.0. ML Modelling

In [21]:
# model definition
model = ElasticNet( alpha=0.5, l1_ratio=0.5, random_state=42 )

In [22]:
# training
model.fit( x_train, y_train )

ElasticNet(alpha=0.5, copy_X=True, fit_intercept=True, l1_ratio=0.5,
           max_iter=1000, normalize=False, positive=False, precompute=False,
           random_state=42, selection='cyclic', tol=0.0001, warm_start=False)

In [24]:
# prediction
pred = model.predict( x_test )

In [28]:
rmse = np.sqrt( mean_squared_error( y_test, pred ))
mae = mean_absolute_error( y_test, pred )
r2  = r2_score( y_test, pred )

In [29]:
print( 'RMSE: {}'.format( rmse ) )
print( 'MAE: {}'.format( mae ) )
print( 'R2: {}'.format( r2 ))

RMSE: 0.7662253029095459
MAE: 0.6191837511265321
R2: 0.12398285597437231


In [30]:
!pwd

/Users/meigarom/repos/SejaUmDataScientist


In [37]:
# save trained model
pickle.dump( model, open( '/Users/meigarom/repos/SejaUmDataScientist/deploy/model_wine_quality.pkl', 'wb' ) )

In [38]:
!ls deploy

model_wine_quality     model_wine_quality.pkl


# 5.0. Prediction 

In [83]:
df = df1.drop( 'quality', axis=1 ).sample()

In [84]:
df

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
741,9.2,0.53,0.24,2.6,0.078,28.0,139.0,0.99788,3.21,0.57,9.5


In [85]:
df_json = df.to_json( orient='records' )

In [86]:
url = 'http://0.0.0.0:5000/predict'
data = df_json
header = {'Content-type': 'application/json'}

# Request
r = requests.post( url=url, data=data, headers=header )

In [87]:
pd.DataFrame( r.json(), columns=r.json()[0].keys() )

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,prediction
0,9.2,0.53,0.24,2.6,0.078,28.0,139.0,0.99788,3.21,0.57,9.5,5.231065
