Compare the accuracy, precision, recall and F1 score of following three classification algorithms for Wine Quality Prediction. 

Logistic Regression Classifier <br>
Support Vector Classifier <br>
Naïve Bayes Classifier <br>
KNN Algorithm <br>
Decision Tree Classifier <br>

Dataset: https://archive.ics.uci.edu/dataset/186/wine+quality 

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, f1_score
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier

In [10]:
def letsgo(file_path):
    data = pd.read_csv(file_path)    #reading csv file and getting data
    data = pd.DataFrame(data)        #placing data in the data frame
    a = 0.3   #proportion of size of test data
    b = 18    #random state for train test spliting
    
    x = data.iloc[:,range(data.shape[1]-1)]  #getting all rows and all columns except for quality
    xCopy = x.copy()
    y = data.iloc[:,data.shape[1]-1]     #getting all rows and quality column only
    
    x = (x - np.mean(x, axis = 0))/np.std(x)    #normalizing the data
    
    #spliting data to train, test set
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = a, random_state = b)    
    
    #logistic regression with max 10000 iterations or tolerance of 1*10**(-15) for difference between two weight values as stopping criteria
    lr = LogisticRegression(max_iter = 10000, multi_class= "multinomial", solver= "saga", tol = 1e-15)
    lr.fit(x_train,y_train)
    report = lr.predict(x_test)
    print("\n\nLogistic Regression:\n\n")
    print("Classification Report:\n",classification_report(y_test, report, zero_division = 0),"\n\nAccuracy:\n",accuracy_score(y_test, report))
    print("-------------------------------------------------------------------------------------------")
    
    #support vector classifier with biases 3 and kernal rbf with stopping criteria of 1*10**(-10)
    svc = SVC(C= 3, kernel="rbf", tol= 1e-10)
    svc.fit(x_train,y_train)
    report1 = svc.predict(x_test)
    print("\n\nSupport Vector Classifier:\n\n")
    print("Classification Report:\n",classification_report(y_test, report1, zero_division = 0),"\n\nAccuracy:\n",accuracy_score(y_test, report1))
    print("-------------------------------------------------------------------------------------------")

    #training multinomal classifier with non normalized data due to issue of negative data
    x_train2, x_test2, y_train2, y_test2 = train_test_split(xCopy, y, test_size = a, random_state = b)
    nv = MultinomialNB()
    nv.fit(x_train2,y_train2)
    report2 = nv.predict(x_test2)
    print("\n\nNaive Bayes Classifier:\n\n")
    print("Classification Report:\n",classification_report(y_test2, report2, zero_division = 0),"\n\nAccuracy:\n",accuracy_score(y_test2, report2))
    print("-------------------------------------------------------------------------------------------")
    
    #k neighbors classifier with L1 distance calculator
    knn = KNeighborsClassifier(p = 1)
    knn.fit(x_train,y_train)
    report3 = knn.predict(x_test)
    print("\n\nKNN Classifier:\n\n")
    print("Classification Report:\n",classification_report(y_test, report3, zero_division = 0),"\n\nAccuracy:\n",accuracy_score(y_test, report3))
    print("-------------------------------------------------------------------------------------------")
    
    #decission tree classifier
    tree = DecisionTreeClassifier()
    tree.fit(x_train,y_train)
    report4 = tree.predict(x_test)
    print("\n\nDecission Tree Classifier:\n\n")
    print("Classification Report:\n",classification_report(y_test, report4, zero_division = 0),"\n\nAccuracy:\n",accuracy_score(y_test, report4))
    print("-------------------------------------------------------------------------------------------")   

In [11]:
letsgo(file_path = "E:\Milan\MDS\machine learning\Dataset\wine+quality\winequality-white.csv")



Logistic Regression:


Classification Report:
               precision    recall  f1-score   support

           3       0.00      0.00      0.00         8
           4       0.50      0.05      0.10        38
           5       0.59      0.54      0.56       425
           6       0.54      0.74      0.62       683
           7       0.54      0.29      0.38       265
           8       0.00      0.00      0.00        50
           9       0.00      0.00      0.00         1

    accuracy                           0.55      1470
   macro avg       0.31      0.23      0.24      1470
weighted avg       0.53      0.55      0.52      1470
 

Accuracy:
 0.5530612244897959
-------------------------------------------------------------------------------------------


Support Vector Classifier:


Classification Report:
               precision    recall  f1-score   support

           3       0.00      0.00      0.00         8
           4       0.33      0.11      0.16        38
           5

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


In [12]:
#using another data set
letsgo(file_path = "E:\Milan\MDS\machine learning\Dataset\wine+quality\winequality-red.csv")



Logistic Regression:


Classification Report:
               precision    recall  f1-score   support

           3       0.00      0.00      0.00         3
           4       0.00      0.00      0.00        17
           5       0.61      0.71      0.65       195
           6       0.54      0.57      0.56       199
           7       0.56      0.36      0.44        64
           8       0.00      0.00      0.00         2

    accuracy                           0.57       480
   macro avg       0.28      0.27      0.27       480
weighted avg       0.55      0.57      0.55       480
 

Accuracy:
 0.5729166666666666
-------------------------------------------------------------------------------------------


Support Vector Classifier:


Classification Report:
               precision    recall  f1-score   support

           3       0.00      0.00      0.00         3
           4       0.00      0.00      0.00        17
           5       0.64      0.78      0.71       195
           6

  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
