In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
from scipy.stats import ttest_rel

# Load the dataset
data = pd.read_csv("winequality-white.csv")

# Separate features and target variable
X = data.drop('quality', axis=1)
y = data['quality']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the models and their respective hyperparameter grids
models = {
    'RandomForest': (RandomForestClassifier(),
                     {'n_estimators': [10, 50, 100],
                      'max_depth': [None, 10, 20],
                      'min_samples_split': [2, 5, 10]}),
    'SVM': (SVC(),
            {'C': [0.1, 1, 10],
             'kernel': ['linear', 'rbf']})
}

# Perform hyperparameter optimization and model evaluation
results = {}
for model_name, (model, param_grid) in models.items():
    grid_search = GridSearchCV(model, param_grid, cv=5)
    grid_search.fit(X_train, y_train)
    best_model = grid_search.best_estimator_
    
    # Evaluate model performance using cross-validation
    cv_scores = cross_val_score(best_model, X_train, y_train, cv=5, scoring='accuracy')
    
    # Perform a t-test to check for overfitting
    test_scores = accuracy_score(y_test, best_model.predict(X_test))
    t_statistic, p_value = ttest_rel(cv_scores, [test_scores] * len(cv_scores))
    
    results[model_name] = {
        'best_model': best_model,
        'cv_scores': cv_scores,
        'test_score': test_scores,
        't_statistic': t_statistic,
        'p_value': p_value
    }

# Print the results
for model_name, result in results.items():
    print(f"Model: {model_name}")
    print(f"Best Hyperparameters: {result['best_model'].get_params()}")
    print(f"Cross-Validation Scores: {result['cv_scores']}")
    print(f"Test Score: {result['test_score']}")
    print(f"T-Statistic: {result['t_statistic']}, P-Value: {result['p_value']}")
    print("\n")
