In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Load the dataset
data = pd.read_csv('WineQT.csv')

# Data Preprocessing
data.fillna(data.mean(), inplace=True)
scaler = StandardScaler()
numerical_features = data.columns[:-1]  # Exclude the target column
data[numerical_features] = scaler.fit_transform(data[numerical_features])
data['quality'] = data['quality'].apply(lambda x: 1 if x >= 7 else 0)

# Split data into features and target
X = data.drop('quality', axis=1)
y = data['quality']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Hyperparameter Tuning for Decision Tree
dt_param_grid = {
    'max_depth': [3, 5, 7, 10],
    'min_samples_split': [2, 5, 10]
}
dt_grid_search = GridSearchCV(DecisionTreeClassifier(random_state=42), dt_param_grid, cv=5)
dt_grid_search.fit(X_train, y_train)
dt_best_model = dt_grid_search.best_estimator_
y_pred_dt = dt_best_model.predict(X_test)

# Evaluate Decision Tree
print("Best Decision Tree Parameters:", dt_grid_search.best_params_)
print("Decision Tree Performance:")
print(classification_report(y_test, y_pred_dt))

# Hyperparameter Tuning for Random Forest
rf_param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [3, 5, 7, 10]
}
rf_grid_search = GridSearchCV(RandomForestClassifier(random_state=42), rf_param_grid, cv=5)
rf_grid_search.fit(X_train, y_train)
rf_best_model = rf_grid_search.best_estimator_
y_pred_rf = rf_best_model.predict(X_test)

# Evaluate Random Forest
print("Best Random Forest Parameters:", rf_grid_search.best_params_)
print("Random Forest Performance:")
print(classification_report(y_test, y_pred_rf))


Best Decision Tree Parameters: {'max_depth': 3, 'min_samples_split': 2}
Decision Tree Performance:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       229

    accuracy                           1.00       229
   macro avg       1.00      1.00      1.00       229
weighted avg       1.00      1.00      1.00       229

Best Random Forest Parameters: {'max_depth': 3, 'n_estimators': 50}
Random Forest Performance:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       229

    accuracy                           1.00       229
   macro avg       1.00      1.00      1.00       229
weighted avg       1.00      1.00      1.00       229

