In [3]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix

In [4]:
# Load the red wine dataset
data = pd.read_csv('wine+quality/winequality-red.csv', sep=';')

In [5]:
# Check the distribution of quality ratings and define quality classes
data['quality_label'] = pd.cut(data['quality'], bins=[2, 5, 6, 8], labels=['low', 'medium', 'high'])

In [6]:
# Prepare the data by separating features and the target label
X = data.drop(['quality', 'quality_label'], axis=1)
y = data['quality_label']

In [7]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Standardize the features to normalize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [9]:
# Initialize and train the Random Forest Classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)

In [10]:
# Predict and evaluate the model performance
y_pred = rf_model.predict(X_test_scaled)
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

              precision    recall  f1-score   support

        high       0.76      0.55      0.64        47
         low       0.76      0.82      0.78       141
      medium       0.65      0.66      0.65       132

    accuracy                           0.71       320
   macro avg       0.72      0.68      0.69       320
weighted avg       0.71      0.71      0.71       320

[[ 26   0  21]
 [  0 115  26]
 [  8  37  87]]


In [11]:
# Extract and print feature importances
feature_importances = pd.DataFrame(rf_model.feature_importances_,
                                   index = X_train.columns,
                                   columns=['importance']).sort_values('importance', ascending=False)
print(feature_importances)

                      importance
alcohol                 0.155416
sulphates               0.121708
total sulfur dioxide    0.101606
volatile acidity        0.098228
density                 0.094087
chlorides               0.078587
citric acid             0.075703
fixed acidity           0.071779
pH                      0.070610
residual sugar          0.067490
free sulfur dioxide     0.064784
