Compare the accuracy, precision, recall and accuracy of following three classification algorithms for Wine Quality Prediction.



1.  Logistic Regression Classifier
2.  Support Vector Classifier
3.  Naïve Bayes Classifier
4.  KNN Algorithm
5.  Decision Tree Classifier

Dataset: https://archive.ics.uci.edu/dataset/186/wine+quality

In [1]:
import pandas as pd
import warnings
# Ignore all warnings
warnings.filterwarnings("ignore")

# Getting the data from GitHub
github_xlsx = 'https://raw.githubusercontent.com/Rk-Pudasaini/Applied_Machine_Learning/main/Datasets/winequality-white.xlsx'

# Read the CSV file from GitHub into a DataFrame
df = pd.read_excel(github_xlsx)

df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8,6
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5,6
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9,6


In [2]:
df.shape


(4898, 12)

In [3]:
# Assuming "quality" is the target variable
X = df.drop("quality", axis=1)
y = df["quality"]


In [4]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features (mean=0 and variance=1)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Initialize classifiers
classifiers = {
    'Logistic Regression': LogisticRegression(),
    'Support Vector Classifier': SVC(),
    'Naïve Bayes': GaussianNB(),
    'KNN': KNeighborsClassifier(),
    'Decision Tree': DecisionTreeClassifier()
}

# Evaluate each classifier
results = {'Classifier': [], 'Accuracy': [], 'Precision': [], 'Recall': [], 'F1 Score': []}

for name, clf in classifiers.items():
    # Train the model
    clf.fit(X_train, y_train)

    # Make predictions
    y_pred = clf.predict(X_test)

    # Evaluate performance
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')

    # Store results
    results['Classifier'].append(name)
    results['Accuracy'].append(accuracy)
    results['Precision'].append(precision)
    results['Recall'].append(recall)
    results['F1 Score'].append(f1)

# Create a DataFrame for results
results_df = pd.DataFrame(results)

# Display the results
print(results_df)


                  Classifier  Accuracy  Precision    Recall  F1 Score
0        Logistic Regression  0.530612   0.517446  0.530612  0.497007
1  Support Vector Classifier  0.561224   0.553582  0.561224  0.527080
2                Naïve Bayes  0.432653   0.450030  0.432653  0.422993
3                        KNN  0.542857   0.537180  0.542857  0.536376
4              Decision Tree  0.609184   0.615139  0.609184  0.611845


In [6]:
#For red- Wine

def Red_wine():
  # Getting the data from GitHub
  github_xlsx = 'https://raw.githubusercontent.com/Rk-Pudasaini/Applied_Machine_Learning/main/Datasets/winequality-red.xlsx'
  # Read the CSV file from GitHub into a DataFrame
  df = pd.read_excel(github_xlsx)
  df.head()
  X = df.drop("quality", axis=1)
  y = df["quality"]
  #split
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

  # Standardize the features (mean=0 and variance=1)
  scaler = StandardScaler()
  X_train = scaler.fit_transform(X_train)
  X_test = scaler.transform(X_test)

  # Initialize classifiers
  classifiers = {
      'Logistic Regression': LogisticRegression(),
      'Support Vector Classifier': SVC(),
      'Naïve Bayes': GaussianNB(),
      'KNN': KNeighborsClassifier(),
      'Decision Tree': DecisionTreeClassifier()
  }

  # Evaluate each classifier
  results = {'Classifier': [], 'Accuracy': [], 'Precision': [], 'Recall': [], 'F1 Score': []}

  for name, clf in classifiers.items():
      # Train the model
      clf.fit(X_train, y_train)

      # Make predictions
      y_pred = clf.predict(X_test)

      # Evaluate performance
      accuracy = accuracy_score(y_test, y_pred)
      precision = precision_score(y_test, y_pred, average='weighted')
      recall = recall_score(y_test, y_pred, average='weighted')
      f1 = f1_score(y_test, y_pred, average='weighted')

      # Store results
      results['Classifier'].append(name)
      results['Accuracy'].append(accuracy)
      results['Precision'].append(precision)
      results['Recall'].append(recall)
      results['F1 Score'].append(f1)

  # Create a DataFrame for results
  results_df = pd.DataFrame(results)

  return results_df


In [7]:
Red_wine()

Unnamed: 0,Classifier,Accuracy,Precision,Recall,F1 Score
0,Logistic Regression,0.575,0.561804,0.575,0.551084
1,Support Vector Classifier,0.603125,0.5691,0.603125,0.572891
2,Naïve Bayes,0.546875,0.542588,0.546875,0.543498
3,KNN,0.546875,0.522388,0.546875,0.530905
4,Decision Tree,0.58125,0.572636,0.58125,0.576875
