In [12]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
import warnings
warnings.filterwarnings('ignore')

In [3]:
data = pd.read_csv("WineQT.csv")

In [4]:
# Display basic information about the dataset
print(data)
data.info()
data.describe()
data.isna().sum()
data.shape
data.size

      fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0               7.4             0.700         0.00             1.9      0.076   
1               7.8             0.880         0.00             2.6      0.098   
2               7.8             0.760         0.04             2.3      0.092   
3              11.2             0.280         0.56             1.9      0.075   
4               7.4             0.700         0.00             1.9      0.076   
...             ...               ...          ...             ...        ...   
1138            6.3             0.510         0.13             2.3      0.076   
1139            6.8             0.620         0.08             1.9      0.068   
1140            6.2             0.600         0.08             2.0      0.090   
1141            5.9             0.550         0.10             2.2      0.062   
1142            5.9             0.645         0.12             2.0      0.075   

      free sulfur dioxide  

14859

In [5]:
# Data preprocessing
# Separate features (X) and target variable (y)
X = data.drop('quality', axis=1).values
y = data['quality'].values

In [6]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Model Building
# Random Forest Classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

In [8]:
# Stochastic Gradient Descent Classifier
sgd_classifier = SGDClassifier(random_state=42)
sgd_classifier.fit(X_train, y_train)

In [9]:
# Support Vector Classifier
svc_classifier = SVC(random_state=42)
svc_classifier.fit(X_train, y_train)

In [13]:
# Model Evaluation
# Random Forest Classifier
rf_predictions = rf_classifier.predict(X_test)
print("Random Forest Classifier:")
print(classification_report(y_test, rf_predictions))
print("Confusion Matrix:")
print(confusion_matrix(y_test, rf_predictions))

Random Forest Classifier:
              precision    recall  f1-score   support

           4       0.00      0.00      0.00         6
           5       0.73      0.75      0.74        96
           6       0.63      0.70      0.66        99
           7       0.75      0.58      0.65        26
           8       0.00      0.00      0.00         2

    accuracy                           0.68       229
   macro avg       0.42      0.40      0.41       229
weighted avg       0.66      0.68      0.67       229

Confusion Matrix:
[[ 0  3  3  0  0]
 [ 0 72 24  0  0]
 [ 0 24 69  5  1]
 [ 0  0 11 15  0]
 [ 0  0  2  0  0]]


In [14]:
# Stochastic Gradient Descent Classifier
sgd_predictions = sgd_classifier.predict(X_test)
print("Stochastic Gradient Descent Classifier:")
print(classification_report(y_test, sgd_predictions))
print("Confusion Matrix:")
print(confusion_matrix(y_test, sgd_predictions))

Stochastic Gradient Descent Classifier:
              precision    recall  f1-score   support

           4       0.00      0.00      0.00         6
           5       0.74      0.33      0.46        96
           6       0.48      0.91      0.63        99
           7       0.00      0.00      0.00        26
           8       0.00      0.00      0.00         2

    accuracy                           0.53       229
   macro avg       0.25      0.25      0.22       229
weighted avg       0.52      0.53      0.47       229

Confusion Matrix:
[[ 0  1  5  0  0]
 [ 0 32 64  0  0]
 [ 0  9 90  0  0]
 [ 0  1 25  0  0]
 [ 0  0  2  0  0]]


In [15]:
# Support Vector Classifier
svc_predictions = svc_classifier.predict(X_test)
print("Support Vector Classifier:")
print(classification_report(y_test, svc_predictions))
print("Confusion Matrix:")
print(confusion_matrix(y_test, svc_predictions))

Support Vector Classifier:
              precision    recall  f1-score   support

           4       0.00      0.00      0.00         6
           5       0.49      0.64      0.55        96
           6       0.50      0.53      0.51        99
           7       0.00      0.00      0.00        26
           8       0.00      0.00      0.00         2

    accuracy                           0.49       229
   macro avg       0.20      0.23      0.21       229
weighted avg       0.42      0.49      0.45       229

Confusion Matrix:
[[ 0  2  4  0  0]
 [ 0 61 35  0  0]
 [ 0 47 52  0  0]
 [ 0 14 12  0  0]
 [ 0  1  1  0  0]]


In [None]:
# Data Visualization
# Example: Pairplot using Seaborn
sns.pairplot(data, hue='quality')
plt.show()