In [None]:
# Task 1
# Import the required libraries
import pandas as pd
from sklearn.model_selection import train_test_split
# Load the dataset
data = pd.read_csv('winequality-red.csv')
display(data.head())
# Split into features and target
X = data.drop(columns='quality')
y = data['quality']
# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
# Define parameter grid for Random Forest
rf_param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [5, 10, 20],
    'min_samples_split': [2, 5, 10]
}
# Define parameter grid for Gradient Boosting
gb_param_grid = {
    'learning_rate': [0.01, 0.1, 0.2],
    'n_estimators': [50, 100, 200],
    'max_depth': [3, 5, 7]
}


In [None]:
# Task 2
# Implement Hyperparameter tuning
from sklearn.model_selection import GridSearchCV
# Initialize Grid Search
rf_grid_search = GridSearchCV(RandomForestClassifier(), rf_param_grid, scoring='accuracy', cv=5, n_jobs=-1)
rf_grid_search.fit(X_train, y_train)
# Display best parameters
print("Best parameters (Grid Search - Random Forest):", rf_grid_search.best_params_)

In [None]:
from sklearn.model_selection import RandomizedSearchCV
# Initialize Random Search
gb_random_search = RandomizedSearchCV(GradientBoostingClassifier(), gb_param_grid, scoring='accuracy', cv=5, n_iter=10, n_jobs=-1)
gb_random_search.fit(X_train, y_train)
# Display best parameters
print("Best parameters (Random Search - Gradient Boosting):", gb_random_search.best_params_)

In [None]:
# Use Optuna
import optuna
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

def rf_objective(trial):
    n_estimators = trial.suggest_int('n_estimators', 50, 200)
    max_depth = trial.suggest_int('max_depth', 5, 20)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 10)
    model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, min_samples_split=min_samples_split)
    return cross_val_score(model, X_train, y_train, cv=5).mean()
# Start Optuna Study
rf_study = optuna.create_study(direction='maximize')
rf_study.optimize(rf_objective, n_trials=20)
print("Best parameters (Optuna - Random Forest):", rf_study.best_params)

In [None]:
# Task 3
# Analyze and Intepret Results
from sklearn.metrics import accuracy_score, classification_report
# Train best Random Forest model from Grid Search
best_rf = rf_grid_search.best_estimator_
y_pred_rf = best_rf.predict(X_test)
print("Random Forest (Grid Search) Accuracy:", accuracy_score(y_test, y_pred_rf))
print("Classification Report:\n", classification_report(y_test, y_pred_rf, zero_division=0))

# Train best Gradient Boosting model from Random Search
best_gb = gb_random_search.best_estimator_
y_pred_gb = best_gb.predict(X_test)
print("Gradient Boosting (Random Search) Accuracy:", accuracy_score(y_test, y_pred_gb))
print("Classification Report:\n", classification_report(y_test, y_pred_gb, zero_division=0))

# Train best Random Forest model from Optuna
best_rf_optuna = RandomForestClassifier(**rf_study.best_params).fit(X_train, y_train)
y_pred_rf_optuna = best_rf_optuna.predict(X_test)
print("Random Forest (Optuna) Accuracy:", accuracy_score(y_test, y_pred_rf_optuna))
print("Classification Report:\n", classification_report(y_test, y_pred_rf_optuna, zero_division=0))

In [None]:
import matplotlib.pyplot as plt
accuracies = [accuracy_score(y_test, y_pred_rf), accuracy_score(y_test, y_pred_gb), accuracy_score(y_test, y_pred_rf_optuna)]
labels = ["RF (Grid Search)", "GB (Random Search)", "RF (Optuna)"]
plt.figure(figsize=(10, 5))
plt.bar(labels, accuracies, color=['blue', 'green', 'orange'])
plt.xlabel("Model and Tuning Method")
plt.ylabel("Accuracy")
plt.title("Model Accuracy Comparison for Best Hyperparameter Configurations")
plt.show()