# 0.0. Imports

In [2]:
import requests
import pandas as pd
import numpy  as np
import pickle
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model    import ElasticNet
from sklearn.metrics         import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing   import MinMaxScaler

## 0.1. Loading Data

In [3]:
csv_url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
df_raw = pd.read_csv( csv_url, sep=';' )

# 1.0. Data Preparation

In [4]:
df1 = df_raw.copy()

## 1.1. Rescaling

In [5]:
# min max scaler
# x_new = ( x_old - min() ) / ( max - min )
mms = MinMaxScaler()

In [6]:
# free sulfur dioxide
df1['free sulfur dioxide'] = mms.fit_transform( df1[['free sulfur dioxide']].values )
pickle.dump( mms, open( '/Users/meigarom/repos/SejaUmDataScientist/deploy/free_sulfur_scaler.pkl', 'wb' ))

# total sulfur dioxide
df1['total sulfur dioxide'] = np.log1p( df1['total sulfur dioxide'] )
df1['total sulfur dioxide'] = mms.fit_transform( df1[['total sulfur dioxide']].values )
pickle.dump( mms, open( '/Users/meigarom/repos/SejaUmDataScientist/deploy/total_sulfur_scaler.pkl', 'wb' ))

In [7]:
train, test = train_test_split( df1 ) #75% e 25% test

In [8]:
# train and test dataset
x_train = train.drop( 'quality', axis=1 )
y_train = train['quality']

x_test = test.drop( 'quality', axis=1 )
y_test = test['quality']

# 2.0. ML Modelling

In [9]:
# model definition
model = ElasticNet( alpha=0.5, l1_ratio=0.5, random_state=42 )

In [10]:
# training
model.fit( x_train, y_train )

ElasticNet(alpha=0.5, copy_X=True, fit_intercept=True, l1_ratio=0.5,
           max_iter=1000, normalize=False, positive=False, precompute=False,
           random_state=42, selection='cyclic', tol=0.0001, warm_start=False)

In [11]:
# prediction
pred = model.predict( x_test )

In [12]:
rmse = np.sqrt( mean_squared_error( y_test, pred ))
mae = mean_absolute_error( y_test, pred )
r2  = r2_score( y_test, pred )

In [13]:
print( 'RMSE: {}'.format( rmse ) )
print( 'MAE: {}'.format( mae ) )
print( 'R2: {}'.format( r2 ))

RMSE: 0.7125122318665796
MAE: 0.6001158070422773
R2: 0.13246906591279461


In [14]:
# save trained model
pickle.dump( model, open( '/Users/meigarom/repos/SejaUmDataScientist/deploy/model_wine_quality.pkl', 'wb' ) )

In [15]:
!ls deploy

[34m__pycache__[m[m             handler.py              total_sulfur_scaler.pkl
free_sulfur_scaler.pkl  model_wine_quality.pkl  wine_quality.py


# 5.0. Prediction 

In [16]:
df = df_raw.drop( 'quality', axis=1 ).sample()

In [17]:
df

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
648,8.7,0.48,0.3,2.8,0.066,10.0,28.0,0.9964,3.33,0.67,11.2


In [18]:
df_json = df.to_json( orient='records' )

In [19]:
#url = 'http://0.0.0.0:5000/predict'
url = 'https://wine-quality-model.herokuapp.com/predict'
data = df_json
header = {'Content-type': 'application/json'}

# Request
r = requests.post( url=url, data=data, headers=header )
print( r.status_code )

200


In [20]:
pd.DataFrame( r.json(), columns=r.json()[0].keys() )

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,prediction
0,8.7,0.48,0.3,2.8,0.066,0.126761,6.99632,0.9964,3.33,0.67,11.2,5.725362


## Class In Production

In [17]:
import pickle 

class WineQuality( object ):
    def __init__( self ):
        self.free_sulfur_scaler = pickle.load( open( '/Users/meigarom/repos/SejaUmDataScientist/deploy/free_sulfur_scaler.pkl', 'rb'))
        self.total_sulfur_scaler = pickle.load( open( '/Users/meigarom/repos/SejaUmDataScientist/deploy/total_sulfur_scaler.pkl', 'rb'))
        
    
    def data_preparation( self, df ):
        # rescaling free sulfur
        df['free sulfur dioxide'] = self.free_sulfur_scaler.transform( df['free sulfur dioxide'] )
        
        # rescaling total sulfur
        df['total sulfur dioxide'] = self.total_sulfur_scaler.transform( df['total sulfur dioxide'] )
        
        return df