Wine Quality (Red) Data set from https://archive.ics.uci.edu/ml/datasets/wine+quality

We're looking for Wine Quality here

In [19]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.metrics import confusion_matrix
from sklearn import metrics
from sklearn.model_selection import KFold

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers


In [20]:
winedf = pd.read_csv('winequality-red.csv',sep=";")
winedf.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [21]:
winedf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1599 entries, 0 to 1598
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   fixed acidity         1599 non-null   float64
 1   volatile acidity      1599 non-null   float64
 2   citric acid           1599 non-null   float64
 3   residual sugar        1599 non-null   float64
 4   chlorides             1599 non-null   float64
 5   free sulfur dioxide   1599 non-null   float64
 6   total sulfur dioxide  1599 non-null   float64
 7   density               1599 non-null   float64
 8   pH                    1599 non-null   float64
 9   sulphates             1599 non-null   float64
 10  alcohol               1599 non-null   float64
 11  quality               1599 non-null   int64  
dtypes: float64(11), int64(1)
memory usage: 150.0 KB


In [22]:
#no null values found in dataframe
winedf.isnull().values.all()

False

In [23]:
def calc(labs, pred):
    cm = confusion_matrix(labs,pred)
    fp = int((cm.sum(axis=0) - np.diag(cm)).sum() ) 
    fn = int((cm.sum(axis=1) - np.diag(cm)).sum() )
    tp = int(np.diag(cm).sum())
    tn = int(abs(((cm.ravel().sum())*(cm.shape[1])) - (fp + fn + tp)))
    posi = tp + fn
    negi = tn +fp
    tpr = tp/posi
    tnr = tn/negi
    fpr= fp/negi
    fnr = fn / posi
    preci = tp/(tp+fp)
    f1 = (2 *tp)/(2 * tp + fp + fn)    
    acc = (tp+tn)/(posi+negi)
    err = (fp+fn)/(posi + negi)
    bacc = (tpr+tnr)/2
    tss = (tp/(tp+fn))-(fp/(fp+tn))
    hss = (2*((tp*tn)-(fp*fn))/((tp+fn)*(fn+tn)+(tp+fp)*(fp+tn)))
    indval = [fp,fn,tp,tn,posi,negi,tpr,tnr,fpr,fnr,preci,f1,acc,err,bacc,tss,hss]
    return indval

In [24]:
def aveCalc(df):
    rst=[]
    for ind,row in df.iterrows():
        val=(row.sum()/len(row))
        rst.append(val)
    return rst

In [25]:
X= winedf.drop('quality', axis=1)
y = winedf['quality']

# Random Forest & SVM

In [26]:
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators=50, random_state=8)

In [27]:
from sklearn.svm import SVC
model_svm = SVC()

In [28]:
k = 10
kf = KFold(n_splits=k, random_state=None)
fc = 0
ind = ['FP','FN','TP','TN','Positive','Negative','TPR','TNR','FPR','FNR','Precision','F1','Accuracy','Error','BACC','TSS','HSS']

rf_df = pd.DataFrame(index=ind)
svm_df = pd.DataFrame(index=ind)
gru_df = pd.DataFrame(index=ind)

In [29]:
fc = 0
for train_ind , test_ind in kf.split(X):
    fc=fc+1
    cn= 'fold '+str(fc)
    X_train , X_test = X.iloc[train_ind,:],X.iloc[test_ind,:]
    y_train , y_test = y[train_ind] , y[test_ind]
   
    #Random Forest
    classifier.fit(X_train,y_train)
    RF_pred = classifier.predict(X_test)

    Rf_cal = calc(y_test, RF_pred)
    rf_df[cn]=Rf_cal


  # SVM 
    model_svm.fit(X_train, y_train)
    SVM_pred = model_svm.predict(X_test)
    
    svm_cal = calc(y_test, SVM_pred)
    svm_df[cn]=svm_cal
     

rf_df['Average']=aveCalc(rf_df)
svm_df['Average']=aveCalc(svm_df)

## Random Forest Results

In [30]:
rf_df

Unnamed: 0,fold 1,fold 2,fold 3,fold 4,fold 5,fold 6,fold 7,fold 8,fold 9,fold 10,Average
FP,57.0,68.0,79.0,69.0,58.0,80.0,70.0,62.0,57.0,70.0,67.0
FN,57.0,68.0,79.0,69.0,58.0,80.0,70.0,62.0,57.0,70.0,67.0
TP,103.0,92.0,81.0,91.0,102.0,80.0,90.0,98.0,103.0,89.0,92.9
TN,583.0,572.0,721.0,731.0,582.0,720.0,570.0,578.0,743.0,725.0,652.5
Positive,160.0,160.0,160.0,160.0,160.0,160.0,160.0,160.0,160.0,159.0,159.9
Negative,640.0,640.0,800.0,800.0,640.0,800.0,640.0,640.0,800.0,795.0,719.5
TPR,0.64375,0.575,0.50625,0.56875,0.6375,0.5,0.5625,0.6125,0.64375,0.559748,0.580975
TNR,0.910937,0.89375,0.90125,0.91375,0.909375,0.9,0.890625,0.903125,0.92875,0.91195,0.906351
FPR,0.089063,0.10625,0.09875,0.08625,0.090625,0.1,0.109375,0.096875,0.07125,0.08805,0.093649
FNR,0.35625,0.425,0.49375,0.43125,0.3625,0.5,0.4375,0.3875,0.35625,0.440252,0.419025


# SVM Results

In [31]:
svm_df

Unnamed: 0,fold 1,fold 2,fold 3,fold 4,fold 5,fold 6,fold 7,fold 8,fold 9,fold 10,Average
FP,75.0,69.0,81.0,82.0,81.0,101.0,86.0,82.0,70.0,83.0,81.0
FN,75.0,69.0,81.0,82.0,81.0,101.0,86.0,82.0,70.0,83.0,81.0
TP,85.0,91.0,79.0,78.0,79.0,59.0,74.0,78.0,90.0,76.0,78.9
TN,405.0,571.0,719.0,718.0,559.0,699.0,394.0,558.0,730.0,712.0,606.5
Positive,160.0,160.0,160.0,160.0,160.0,160.0,160.0,160.0,160.0,159.0,159.9
Negative,480.0,640.0,800.0,800.0,640.0,800.0,480.0,640.0,800.0,795.0,687.5
TPR,0.53125,0.56875,0.49375,0.4875,0.49375,0.36875,0.4625,0.4875,0.5625,0.477987,0.493424
TNR,0.84375,0.892188,0.89875,0.8975,0.873437,0.87375,0.820833,0.871875,0.9125,0.895597,0.878018
FPR,0.15625,0.107813,0.10125,0.1025,0.126562,0.12625,0.179167,0.128125,0.0875,0.104403,0.121982
FNR,0.46875,0.43125,0.50625,0.5125,0.50625,0.63125,0.5375,0.5125,0.4375,0.522013,0.506576


# LSTM

In [32]:
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras import metrics 

In [42]:
lstm_mod = Sequential()
lstm_mod.add(Dense(12, activation ='softmax', input_shape =(11, )))
lstm_mod.add(Dense(9, activation ='softmax'))
lstm_mod.add(Dense(1, activation ='sigmoid'))
lstm_mod.output_shape
lstm_mod.summary()
lstm_mod.get_config()
  
# List all weight tensors
lstm_mod.get_weights()
lstm_mod.compile(loss ='binary_crossentropy', 
  optimizer ='adamax', metrics = [metrics.categorical_accuracy])

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_15 (Dense)             (None, 12)                144       
_________________________________________________________________
dense_16 (Dense)             (None, 9)                 117       
_________________________________________________________________
dense_17 (Dense)             (None, 1)                 10        
Total params: 271
Trainable params: 271
Non-trainable params: 0
_________________________________________________________________


In [43]:
for train_ind , test_ind in kf.split(X):
    fc=fc+1
    cn= 'fold '+str(fc)
    X_train , X_test = X.iloc[train_ind,:],X.iloc[test_ind,:]
    y_train , y_test = y[train_ind] , y[test_ind]
    lstm_mod.fit(X_train, y_train, epochs = 3,batch_size = 32, verbose = 1,validation_split=0)
    y_pred = lstm_mod.predict(X_test)
    print(y_pred)

Epoch 1/3
Epoch 2/3
Epoch 3/3
[[0.5612089 ]
 [0.56389713]
 [0.56380725]
 [0.5638626 ]
 [0.5612089 ]
 [0.5632008 ]
 [0.56386447]
 [0.55638343]
 [0.5563855 ]
 [0.56391186]
 [0.5638838 ]
 [0.56391186]
 [0.5638538 ]
 [0.55891204]
 [0.56391484]
 [0.56391484]
 [0.56391454]
 [0.5638392 ]
 [0.5582424 ]
 [0.5638253 ]
 [0.56386745]
 [0.56389695]
 [0.56281346]
 [0.56388116]
 [0.56134063]
 [0.55656034]
 [0.5562399 ]
 [0.56281346]
 [0.56102717]
 [0.5563806 ]
 [0.5639095 ]
 [0.55774677]
 [0.56391466]
 [0.5638609 ]
 [0.56367266]
 [0.55638105]
 [0.5564216 ]
 [0.5569162 ]
 [0.5565051 ]
 [0.56389666]
 [0.56389666]
 [0.5634314 ]
 [0.55635655]
 [0.556446  ]
 [0.55628276]
 [0.5638484 ]
 [0.5639147 ]
 [0.5613487 ]
 [0.5563371 ]
 [0.56391346]
 [0.55682456]
 [0.55637836]
 [0.5563737 ]
 [0.5639148 ]
 [0.56390625]
 [0.5618978 ]
 [0.5559503 ]
 [0.56391466]
 [0.56379294]
 [0.56342494]
 [0.5637456 ]
 [0.56391466]
 [0.56135005]
 [0.5637985 ]
 [0.55587965]
 [0.55587965]
 [0.5631508 ]
 [0.5563832 ]
 [0.56388974]
 [0.

 [0.77954155]]
Epoch 1/3
Epoch 2/3
Epoch 3/3
[[0.8289343 ]
 [0.8289343 ]
 [0.8289343 ]
 [0.8289343 ]
 [0.8289343 ]
 [0.82880586]
 [0.8289327 ]
 [0.82889503]
 [0.8289342 ]
 [0.8289343 ]
 [0.8289343 ]
 [0.8289343 ]
 [0.8289343 ]
 [0.8289343 ]
 [0.82893413]
 [0.8289343 ]
 [0.8289343 ]
 [0.8289343 ]
 [0.82893217]
 [0.82890666]
 [0.82893217]
 [0.8289343 ]
 [0.82893413]
 [0.82868266]
 [0.8289343 ]
 [0.8289343 ]
 [0.8289214 ]
 [0.8288263 ]
 [0.8289343 ]
 [0.8288263 ]
 [0.8289343 ]
 [0.82884336]
 [0.8289343 ]
 [0.82884336]
 [0.8289175 ]
 [0.8289343 ]
 [0.8289175 ]
 [0.8289343 ]
 [0.8289343 ]
 [0.8289343 ]
 [0.8289343 ]
 [0.8289343 ]
 [0.8289343 ]
 [0.82891417]
 [0.8289343 ]
 [0.82891417]
 [0.8289343 ]
 [0.8289343 ]
 [0.82774925]
 [0.82892597]
 [0.82891786]
 [0.8289343 ]
 [0.8289343 ]
 [0.8289343 ]
 [0.8289343 ]
 [0.8289343 ]
 [0.8289339 ]
 [0.8289339 ]
 [0.8289343 ]
 [0.8289103 ]
 [0.8289343 ]
 [0.8289339 ]
 [0.8289337 ]
 [0.8289343 ]
 [0.82881844]
 [0.8289343 ]
 [0.8289331 ]
 [0.8289343 ]
 [0

 [0.91986334]]
Epoch 1/3
Epoch 2/3
Epoch 3/3
[[0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.93783784]
 [0.9381422 ]
 [0.9381422 ]
 [0.93814003]
 [0.9381422 ]
 [0.9381422 ]
 [0.93813956]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381275 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381412 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.93814206]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381364 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381422 ]
 [0.9381412 ]
 [0.9381395 ]
 [0