In [25]:
# Q-2)
import numpy as np
import pandas as pd
import time
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import roc_auc_score

# Load the CSV file
file_path = "/content/winequality-red.csv"
data = pd.read_csv(file_path, delimiter=';')

data['quality'] = (data['quality'] > 6).astype(int)
X = data.drop(columns=['quality']).values
y = data['quality'].values




In [35]:
#Q-2(a)

def holdout(model, X, y, testSize):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=testSize, random_state=42)
    start_time = time.time()
    model.fit(X_train, y_train)
    train_auc = roc_auc_score(y_train, model.predict_proba(X_train)[:, 1])
    test_auc = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1])
    time_elapsed = time.time() - start_time
    return train_auc, test_auc, time_elapsed

model = DecisionTreeClassifier(max_depth=5, min_samples_leaf=5)
holdout_results = holdout(model, X, y, testSize=0.2)

print("Holdout Results:")
print("-" * 15)
print("Train AUC: {:.4f}".format(holdout_results[0]))
print("Test AUC:  {:.4f}".format(holdout_results[1]))
print("Time Elapsed: {:.4f} seconds".format(holdout_results[2]))
print()

Holdout Results:
---------------
Train AUC: 0.9255
Test AUC:  0.8352
Time Elapsed: 0.0120 seconds



The model shows strong performance on the training data (AUC = 0.93) and good
generalization on the holdout test set (AUC = 0.84). The drop in AUC between
training and test indicates mild overfitting, but overall the model maintains
solid discriminatory power. The low runtime suggests the model is efficient
and suitable for fast evaluation.

In [39]:
#Q_2(b)
def k_fold_cv(model, X, y, k):
    kf = KFold(n_splits=k, shuffle=True, random_state=42)
    train_aucs, test_aucs = [], []
    start_time = time.time()

    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        model.fit(X_train, y_train)
        train_aucs.append(roc_auc_score(y_train, model.predict_proba(X_train)[:, 1]))
        test_aucs.append(roc_auc_score(y_test, model.predict_proba(X_test)[:, 1]))

    time_elapsed = time.time() - start_time
    return np.mean(train_aucs), np.mean(test_aucs), time_elapsed

model = DecisionTreeClassifier(max_depth=5, min_samples_leaf=5)
k_fold_results = k_fold_cv(model, X, y, k=5)

print("K-Fold Cross Validation Results:")
print("-" * 40)

print("Train AUC: {:.4f}".format(k_fold_results[0]))
print("Test AUC:  {:.4f}".format(k_fold_results[1]))
print("Time Elapsed: {:.4f} seconds".format(k_fold_results[2]))
print()

K-Fold Cross Validation Results:
----------------------------------------
Train AUC: 0.9341
Test AUC:  0.8379
Time Elapsed: 0.0797 seconds



K-Fold cross-validation results indicate consistently strong model performance.
The high training AUC (0.93) and stable test AUC (0.84) suggest good
generalization with limited overfitting. The slightly higher runtime reflects
the added robustness gained from multiple validation folds.

In [40]:
#Q-2(c)
def monte_carlo_cv(model, X, y, testSize, s):
    train_aucs, test_aucs = [], []
    start_time = time.time()

    for _ in range(s):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=testSize,
                                                            random_state=np.random.randint(10000))
        model.fit(X_train, y_train)
        train_aucs.append(roc_auc_score(y_train, model.predict_proba(X_train)[:, 1]))
        test_aucs.append(roc_auc_score(y_test, model.predict_proba(X_test)[:, 1]))

    time_elapsed = time.time() - start_time
    return np.mean(train_aucs), np.mean(test_aucs), time_elapsed

model = DecisionTreeClassifier(max_depth=5, min_samples_leaf=5)
monte_carlo_results = monte_carlo_cv(model, X, y, testSize=0.2, s=10)

print("Monte Carlo Cross Validation Results:")
print("-" * 40)
print("Train AUC: {:.4f}".format(monte_carlo_results[0]))
print("Test AUC:  {:.4f}".format(monte_carlo_results[1]))
print("Time Elapsed: {:.4f} seconds".format(monte_carlo_results[2]))
print()

Monte Carlo Cross Validation Results:
----------------------------------------
Train AUC: 0.9320
Test AUC:  0.8479
Time Elapsed: 0.0873 seconds



Monte Carlo cross-validation shows strong and stable model performance.
The training AUC (0.93) and higher test AUC (0.85) indicate good
generalization with minimal overfitting. The increased runtime reflects
the repeated random sampling used to obtain a more reliable performance
estimate.

In [42]:
#Q-2(d)
print("Summary Table:")
print("-" * 50)
print("Technique          | Train AUC | Test AUC | Time (s)")
print("-" * 50)
print("Holdout            | {:9.4f} | {:8.4f} | {:8.4f}".format(*holdout_results))
print("K-Fold (k=5)       | {:9.4f} | {:8.4f} | {:8.4f}".format(*k_fold_results))
print("Monte Carlo (s=10) | {:9.4f} | {:8.4f} | {:8.4f}".format(*monte_carlo_results))
print("-" * 50)

# Additional comparison insights
print("\nComparison Insights:")
print("-" * 40)
print("1. Performance Comparison:")
print(f"   - Highest Test AUC: {max(holdout_results[1], k_fold_results[1], monte_carlo_results[1]):.4f}")
print(f"   - Lowest Test AUC:  {min(holdout_results[1], k_fold_results[1], monte_carlo_results[1]):.4f}")
print()

print("2. Time Efficiency Comparison:")
print(f"   - Fastest: {'Holdout' if holdout_results[2] <= k_fold_results[2] and holdout_results[2] <= monte_carlo_results[2] else 'K-Fold' if k_fold_results[2] <= monte_carlo_results[2] else 'Monte Carlo'}")
print(f"   - Slowest: {'Holdout' if holdout_results[2] >= k_fold_results[2] and holdout_results[2] >= monte_carlo_results[2] else 'K-Fold' if k_fold_results[2] >= monte_carlo_results[2] else 'Monte Carlo'}")
print()

print("3. Overfitting Assessment:")
print(f"   - Train-Test Difference (Holdout): {holdout_results[0] - holdout_results[1]:.4f}")
print(f"   - Train-Test Difference (K-Fold):  {k_fold_results[0] - k_fold_results[1]:.4f}")
print(f"   - Train-Test Difference (Monte Carlo): {monte_carlo_results[0] - monte_carlo_results[1]:.4f}")

Summary Table:
--------------------------------------------------
Technique          | Train AUC | Test AUC | Time (s)
--------------------------------------------------
Holdout            |    0.9255 |   0.8352 |   0.0120
K-Fold (k=5)       |    0.9341 |   0.8379 |   0.0797
Monte Carlo (s=10) |    0.9320 |   0.8479 |   0.0873
--------------------------------------------------

Comparison Insights:
----------------------------------------
1. Performance Comparison:
   - Highest Test AUC: 0.8479
   - Lowest Test AUC:  0.8352

2. Time Efficiency Comparison:
   - Fastest: Holdout
   - Slowest: Monte Carlo

3. Overfitting Assessment:
   - Train-Test Difference (Holdout): 0.0902
   - Train-Test Difference (K-Fold):  0.0962
   - Train-Test Difference (Monte Carlo): 0.0841


Among the validation techniques, Monte Carlo cross-validation achieves the
highest test AUC (0.848), indicating the best generalization performance,
while Holdout yields the lowest test AUC (0.835) but is the most time-efficient.
Trainâ€“test AUC gaps are comparable across methods, with Monte Carlo showing
the smallest gap, suggesting slightly reduced overfitting. Overall, increased
computational cost provides marginal but more reliable performance gains.