## Import The Necessary Libraries

In [2]:
from sklearn.datasets import load_breast_cancer, load_iris, load_wine
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from tabpfn import TabPFNClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import pandas as pd
import time
import plotly.express as px

## Load Datasets

In [3]:
# Load datasets dynamically
datasets = {
    "breast_cancer": load_breast_cancer(return_X_y=True),
    "iris": load_iris(return_X_y=True),
    "wine": load_wine(return_X_y=True)
}

## Define Models

In [4]:
models = {
    "LogisticRegression": LogisticRegression(random_state=42, max_iter=1000),
    "GaussianNB": GaussianNB(),
    "k-Nearest Neighbor": KNeighborsClassifier(n_neighbors=5),
    "DecisionTree": DecisionTreeClassifier(random_state=42),
    "SVM": SVC(kernel="rbf", random_state=42),
    "TabPFNClassifier": TabPFNClassifier()
}

## Model Evaluation Loop: Measuring Accuracy, Fit Time, and Predict Time

In [None]:
# Initialize results
results = []

# Iterate through datasets
for dataset_name, (X, y) in datasets.items():
    # Split each dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
    
    # Standardize features for models sensitive to scaling
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Iterate through models
    for model_name, model in models.items():
        # Measure time to fit the model
        start_fit = time.time()
        if model_name in ["LogisticRegression", "k-Nearest Neighbor", "SVM"]:
            model.fit(X_train_scaled, y_train)
        else:
            model.fit(X_train, y_train)
        end_fit = time.time()
        fit_time = (end_fit - start_fit) * 1000  # Convert to milliseconds
        
        # Measure time to predict
        start_predict = time.time()
        if model_name in ["LogisticRegression", "k-Nearest Neighbor", "SVM"]:
            predictions = model.predict(X_test_scaled)
        else:
            predictions = model.predict(X_test)
        end_predict = time.time()
        predict_time = (end_predict - start_predict) * 1000  # Convert to milliseconds
        
        # Evaluate Accuracy
        accuracy = accuracy_score(y_test, predictions)
        
        # Store the results
        results.append({
            "Dataset": dataset_name,
            "Model": model_name,
            "Accuracy": accuracy,
            "Fit Time (ms)": fit_time,
            "Predict Time (ms)": predict_time
        })


## Show The Results As a Dataframe

In [None]:
# Convert results to a DataFrame
results_df = pd.DataFrame(results)
results_df

## Plot The Results 

In [None]:
# Plot accuracy for all datasets and models in a single graph
fig_accuracy = px.line(results_df, x="Model", y="Accuracy", color="Dataset", markers=True,
                       title="Model Accuracy Across Datasets",
                       labels={"Accuracy": "Accuracy", "Model": "Model", "Dataset": "Dataset"})
fig_accuracy.update_layout(title_x=0.5, xaxis_title="Model", yaxis_title="Accuracy", font=dict(size=14))
fig_accuracy.show()

# Plot fit time and predict time for all datasets and models in a single graph
fig_time = px.line(results_df.melt(id_vars=["Dataset", "Model"], value_vars=["Fit Time (ms)", "Predict Time (ms)"]),
                   x="Model", y="value", color="variable", line_group="Dataset",
                   title="Model Fit and Predict Time Across Datasets (ms)",
                   labels={"value": "Time (ms)", "Model": "Model", "variable": "Metric"})
fig_time.update_layout(title_x=0.5, xaxis_title="Model", yaxis_title="Time (ms)", font=dict(size=14))
fig_time.show()