In [14]:
# Install required libraries
!pip install scikit-learn pandas numpy

# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from google.colab import files

# Load the dataset from the root directory in Colab
file_path = "/content/red_wine.csv"  # Path where the file is located

try:
    red_wine = pd.read_csv(file_path)
    print("Data loaded successfully from:", file_path)
except FileNotFoundError:
    print(f"Error: File not found at {file_path}. Please check the path.")
    raise
except Exception as e:
    print(f"Error loading data: {e}")
    raise

# Prepare features (X) and target (y)
X = red_wine[['citric acid', 'sulphates', 'alcohol']]
y = red_wine['type'].map({'low': 0, 'high': 1})  # Convert to binary (0 = low, 1 = high)

# Define models with default parameters
models = {
    'Baseline': DummyClassifier(strategy='most_frequent'),  # Predicts majority class
    'Logistic Regression': LogisticRegression(),
    'Naive Bayes': GaussianNB(),
    'Decision Tree': DecisionTreeClassifier(),
    'SVM-Linear': SVC(kernel='linear', probability=True),
    'SVM-RBF': SVC(kernel='rbf', probability=True),
    'Random Forest': RandomForestClassifier()
}

# Perform 10-fold cross-validation and store results
results = {}
for name, model in models.items():
    # AUC scores
    auc_scores = cross_val_score(model, X, y, cv=10, scoring='roc_auc')
    # Accuracy scores
    acc_scores = cross_val_score(model, X, y, cv=10, scoring='accuracy')
    # Store mean scores
    results[name] = {
        'AUC': np.mean(auc_scores),
        'Accuracy': np.mean(acc_scores)
    }

# Create results table
results_df = pd.DataFrame(results).T
results_df = results_df[['AUC', 'Accuracy']]  # Ensure column order
print("10-Fold Cross-Validation Results:")
print(results_df)

# Save results to CSV (optional, for reference)
results_df.to_csv('model_results.csv')
files.download('model_results.csv')

Data loaded successfully from: /content/red_wine.csv
10-Fold Cross-Validation Results:
                          AUC  Accuracy
Baseline             0.500000  0.528887
Logistic Regression  0.879902  0.784785
Naive Bayes          0.895408  0.821627
Decision Tree        0.753474  0.760284
SVM-Linear           0.881489  0.793587
SVM-RBF              0.868920  0.535844
Random Forest        0.892384  0.786540


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>