In [1]:
import math

import pandas as pd
import numpy as np

from sklearn import preprocessing
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

from sklearn import svm
from sklearn import tree
from sklearn import ensemble
from sklearn import neighbors
from sklearn import naive_bayes 
from sklearn import linear_model
from sklearn import neural_network

# Reading Data

In [2]:
col_names = ["fixed acidity","volatile acidity","citric acid","residual sugar","chlorides","free sulfur dioxide","total sulfur dioxide","density","pH","sulphates","alcohol","quality"]
data1 = pd.read_csv('winequality-red.csv', names=col_names,delimiter=";")
data2 = pd.read_csv('winequality-white.csv', names=col_names,delimiter=";")

In [4]:
np_data1 =data1.as_matrix()
np_data2 =data2.as_matrix()
print("Red wine data shape: ", np_data1.shape) 
print("White wine data shape: ", np_data2.shape)

Red wine data shape:  (1599, 12)
White wine data shape:  (4898, 12)


# Feature Scaling

In [5]:
normalized_np_data1 = preprocessing.scale(np_data1[:,:10])
normalized_np_data2 = preprocessing.scale(np_data2[:,:10])

# Test Train splitting

In [7]:
x_train1, x_test1, y_train1, y_test1 = train_test_split( normalized_np_data1 , np_data1[:,-1] , test_size=0.20)
x_train2, x_test2, y_train2, y_test2 = train_test_split( normalized_np_data2 , np_data2[:,-1] , test_size=0.20)
print(x_train1.shape)
print(y_train1.shape)

(1279, 10)
(1279,)


# 1) Naive Bayes

In [8]:
def fit_predict_naivebayes( x , y, test ):
    clf = naive_bayes.GaussianNB()
    clf = clf.fit(x, y)
    
    pred = clf.predict( test )    
    return pred

In [9]:
pred1 = fit_predict_naivebayes( x_train1 , y_train1, x_test1 )
pred2 = fit_predict_naivebayes( x_train2 , y_train2, x_test2 )

### Results

In [11]:
print("Red Wine Accuracy: {:.2%}".format( accuracy_score(y_test1, pred1)))  
print ("White Wine Accuracy: {:.2%}".format( accuracy_score(y_test2, pred2))) 

Red Wine Accuracy: 49.38%
White Wine Accuracy: 40.31%


In [462]:
print "Red Wine Confusion matrix"
confusion_matrix(y_test1, pred1 )

Red Wine Confusion matrix


array([[ 0,  0,  1,  0,  0,  0],
       [ 0,  0,  8,  6,  0,  0],
       [ 1,  7, 57, 76,  4,  0],
       [ 0,  6, 23, 80,  7,  0],
       [ 0,  0,  1, 26, 16,  0],
       [ 0,  0,  0,  1,  0,  0]])

In [463]:
print "White Wine Confusion matrix"
confusion_matrix(y_test2, pred2 )

White Wine Confusion matrix


array([[  4,   0,   0,   1,   0,   0],
       [  0,   7,   7,  18,   3,   0],
       [  2,  15, 103, 120,  38,   0],
       [  2,   7,  92, 196, 162,   0],
       [  0,   0,  13,  54, 103,   0],
       [  0,   2,   3,   9,  18,   1]])

# 2) KNN

In [494]:
def fit_predict_knn( x , y, test ):
    clf = neighbors.KNeighborsClassifier( n_neighbors=1 )
    clf = clf.fit(x, y)
    
    pred = clf.predict( test )  
    return pred

In [495]:
pred1 = fit_predict_knn( x_train1 , y_train1, x_test1 )
pred2 = fit_predict_knn( x_train2 , y_train2, x_test2 )

### Results

In [496]:
print "Red Wine Accuracy: {:.2%}".format( accuracy_score(y_test1, pred1))  
print "White Wine Accuracy: {:.2%}".format( accuracy_score(y_test2, pred2)) 

Red Wine Accuracy: 60.94%
White Wine Accuracy: 60.41%


In [497]:
print "Red Wine Confusion matrix"
confusion_matrix(y_test1, pred1 )

Red Wine Confusion matrix


array([[  0,   0,   0,   1,   0,   0],
       [  0,   2,   7,   5,   0,   0],
       [  2,   6, 104,  27,   4,   2],
       [  1,   2,  34,  66,  13,   0],
       [  0,   3,   3,  12,  23,   2],
       [  0,   0,   0,   1,   0,   0]])

In [498]:
print "White Wine Confusion matrix"
confusion_matrix(y_test2, pred2 )

White Wine Confusion matrix


array([[  0,   1,   2,   2,   0,   0,   0],
       [  0,   8,  14,   9,   3,   1,   0],
       [  0,   7, 177,  76,  16,   2,   0],
       [  0,   9,  93, 294,  55,   8,   0],
       [  0,   0,  14,  49,  96,  10,   1],
       [  0,   0,   2,   7,   7,  17,   0],
       [  0,   0,   0,   0,   0,   0,   0]])

# 3) Decision Tree

In [499]:
def fit_predict_decisiontree( x , y, test ):
    clf = tree.DecisionTreeClassifier()
    clf = clf.fit(x, y)
    
    pred = clf.predict( test )    
    return pred

In [500]:
pred1 = fit_predict_decisiontree( x_train1 , y_train1, x_test1 )
pred2 = fit_predict_decisiontree( x_train2 , y_train2, x_test2 )

### Results

In [501]:
print "Red Wine Accuracy: {:.2%}".format( accuracy_score(y_test1, pred1))  
print "White Wine Accuracy: {:.2%}".format( accuracy_score(y_test2, pred2))  

Red Wine Accuracy: 61.56%
White Wine Accuracy: 60.61%


In [502]:
print "Red Wine Confusion matrix"
confusion_matrix(y_test1, pred1 )

Red Wine Confusion matrix


array([[  0,   0,   0,   1,   0,   0],
       [  0,   0,  10,   3,   1,   0],
       [  2,   3, 104,  30,   5,   1],
       [  0,   1,  33,  71,  11,   0],
       [  1,   0,   8,  10,  22,   2],
       [  0,   0,   0,   1,   0,   0]])

In [503]:
print "White Wine Confusion matrix"
confusion_matrix(y_test2, pred2 )

White Wine Confusion matrix


array([[  0,   0,   2,   2,   1,   0],
       [  0,   5,  18,   7,   4,   1],
       [  1,  11, 171,  78,  16,   1],
       [  1,   8,  88, 300,  52,  10],
       [  0,   3,  12,  48, 100,   7],
       [  0,   0,   3,  11,   1,  18]])

# 4) Random Forest

In [504]:
def fit_predict_randomforest( x , y, test ):
    clf = ensemble.RandomForestClassifier( n_estimators=10 , max_features='auto',max_depth=20)
    clf = clf.fit(x, y)
    
    pred = clf.predict( test ) 
    return pred

In [505]:
pred1 = fit_predict_randomforest( x_train1 , y_train1, x_test1 )
pred2 = fit_predict_randomforest( x_train2 , y_train2, x_test2 )

### Results

In [506]:
print "Red Wine Accuracy: {:.2%}".format( accuracy_score(y_test1, pred1))  
print "White Wine Accuracy: {:.2%}".format( accuracy_score(y_test2, pred2))  

Red Wine Accuracy: 64.69%
White Wine Accuracy: 64.49%


In [507]:
print "Red Wine Confusion matrix"
confusion_matrix(y_test1, pred1 )

Red Wine Confusion matrix


array([[  0,   0,   0,   1,   0,   0],
       [  0,   0,  11,   3,   0,   0],
       [  0,   0, 115,  28,   2,   0],
       [  0,   0,  39,  73,   4,   0],
       [  0,   0,   6,  17,  19,   1],
       [  0,   0,   0,   1,   0,   0]])

In [508]:
print "White Wine Confusion matrix"
confusion_matrix(y_test2, pred2 )

White Wine Confusion matrix


array([[  0,   0,   2,   3,   0,   0],
       [  0,   7,  20,   7,   1,   0],
       [  0,   4, 198,  72,   4,   0],
       [  0,   3,  99, 326,  29,   2],
       [  0,   0,  12,  65,  88,   5],
       [  0,   0,   1,  15,   4,  13]])

# 5) Logistic Regression 

In [520]:
def fit_predict_logisiticregression( x , y, test ):
    clf = linear_model.LogisticRegression( )
    clf = clf.fit(x, y)
    
    pred = clf.predict( test ) 
    return pred


In [521]:
pred1 = fit_predict_logisiticregression( x_train1 , y_train1, x_test1 )
pred2 = fit_predict_logisiticregression( x_train2 , y_train2, x_test2 )

### Results

In [522]:
print "Red Wine Accuracy: {:.2%}".format( accuracy_score(y_test1, pred1))  
print "White Wine Accuracy: {:.2%}".format( accuracy_score(y_test2, pred2))  

Red Wine Accuracy: 55.00%
White Wine Accuracy: 51.94%


In [523]:
print "Red Wine Confusion matrix"
confusion_matrix(y_test1, pred1 )

Red Wine Confusion matrix


array([[  0,   0,   1,   0,   0,   0],
       [  0,   0,  10,   4,   0,   0],
       [  0,   0, 108,  37,   0,   0],
       [  0,   0,  52,  64,   0,   0],
       [  0,   0,   2,  37,   4,   0],
       [  0,   0,   0,   0,   1,   0]])

In [524]:
print "White Wine Confusion matrix"
confusion_matrix(y_test2, pred2 )

White Wine Confusion matrix


array([[  0,   0,   2,   3,   0,   0],
       [  0,   0,  17,  18,   0,   0],
       [  0,   0, 135, 143,   0,   0],
       [  0,   0,  98, 347,  14,   0],
       [  0,   0,  15, 128,  27,   0],
       [  0,   0,   1,  23,   9,   0]])

# 6) SVM

In [568]:
def fit_predict_svm( x , y, test ):
    clf = svm.SVC( kernel='rbf')
    clf = clf.fit(x, y)
    
    pred = clf.predict( test ) 
    return pred

In [569]:
pred1 = fit_predict_svm( x_train1 , y_train1, x_test1 )
pred2 = fit_predict_svm( x_train2 , y_train2, x_test2 )

### Results

In [570]:
print "Red Wine Accuracy: {:.2%}".format( accuracy_score(y_test1, pred1))  
print "White Wine Accuracy: {:.2%}".format( accuracy_score(y_test2, pred2))  

Red Wine Accuracy: 58.13%
White Wine Accuracy: 55.20%


In [571]:
print "Red Wine Confusion matrix"
confusion_matrix(y_test1, pred1 )

Red Wine Confusion matrix


array([[ 0,  0,  0,  1,  0,  0],
       [ 0,  0, 11,  3,  0,  0],
       [ 0,  0, 96, 48,  1,  0],
       [ 0,  0, 40, 75,  1,  0],
       [ 0,  0,  3, 25, 15,  0],
       [ 0,  0,  0,  1,  0,  0]])

In [572]:
print "White Wine Confusion matrix"
confusion_matrix(y_test2, pred2 )

White Wine Confusion matrix


array([[  0,   0,   2,   3,   0,   0],
       [  0,   4,  19,  12,   0,   0],
       [  0,   1, 162, 115,   0,   0],
       [  0,   0, 102, 342,  15,   0],
       [  0,   0,   7, 130,  33,   0],
       [  0,   0,   1,  25,   7,   0]])

# 7) Neural Network

In [585]:
def fit_predict_nn( x , y, test ):
    clf = neural_network.MLPClassifier( )
    clf = clf.fit(x, y)
    
    pred = clf.predict( test ) 
    return pred

In [586]:
pred1 = fit_predict_nn( x_train1 , y_train1, x_test1 )
pred2 = fit_predict_nn( x_train2 , y_train2, x_test2 )



### Results

In [587]:
print "Red Wine Accuracy: {:.2%}".format( accuracy_score(y_test1, pred1))  
print "White Wine Accuracy: {:.2%}".format( accuracy_score(y_test2, pred2))  

Red Wine Accuracy: 54.37%
White Wine Accuracy: 57.76%


In [588]:
print "Red Wine Confusion matrix"
confusion_matrix(y_test1, pred1 )

Red Wine Confusion matrix


array([[ 0,  0,  1,  0,  0,  0],
       [ 0,  0, 10,  4,  0,  0],
       [ 0,  2, 91, 48,  4,  0],
       [ 0,  1, 41, 63, 11,  0],
       [ 0,  0,  2, 21, 20,  0],
       [ 0,  0,  0,  0,  1,  0]])

In [589]:
print "White Wine Confusion matrix"
confusion_matrix(y_test2, pred2 )

White Wine Confusion matrix


array([[  0,   1,   2,   2,   0,   0],
       [  0,   6,  17,  12,   0,   0],
       [  0,   5, 163, 107,   3,   0],
       [  0,   1,  88, 330,  40,   0],
       [  0,   0,   8,  95,  66,   1],
       [  0,   0,   1,  18,  13,   1]])