In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# Define dataset paths (replace with your Colab paths or upload paths)
datasets = {
    "data.csv": "/content/data.csv",
    "preprocessed_minmax.csv": "/content/preprocessed_minmax.csv",
    "preprocessed_zscore.csv": "/content/preprocessed_zscore.csv",
    "preprocessed_decimal.csv": "/content/preprocessed_decimal.csv",
    "feature_engineered_data.csv": "/content/feature_engineered_data.csv",
    "fe_minmax.csv": "/content/fe_minmax.csv",
    "fe_zscore.csv": "/content/fe_zscore.csv",
    "fe_decimal.csv": "/content/fe_decimal.csv"
}

results = []
label_encoder = LabelEncoder()

# Train and evaluate on each dataset
for name, path in datasets.items():
    try:
        df = pd.read_csv(path, sep=';' if name == "data.csv" else ',')
        df.columns = df.columns.str.strip()

        if "Target" not in df.columns:
            raise ValueError(f"'Target' column missing in {name}")

        X = df.drop("Target", axis=1)
        y = df["Target"]

        if y.dtype == 'O':
            y = label_encoder.fit_transform(y)

        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, stratify=y, random_state=42
        )

        model = GaussianNB()
        model.fit(X_train, y_train)
        acc = accuracy_score(y_test, model.predict(X_test))

        results.append({
            "Dataset": name,
            "Accuracy": round(acc, 4),
            "Model": "Naive Bayes",
            "Hyperparameters": "Default"
        })

    except Exception as e:
        results.append({
            "Dataset": name,
            "Accuracy": "Error",
            "Model": "Naive Bayes",
            "Hyperparameters": "Default",
            "Error": str(e)
        })

# Show results in table
results_df = pd.DataFrame(results)
print("📋 Naive Bayes Accuracy Results:")
display(results_df)

# Show bar chart
plot_data = results_df[results_df["Accuracy"] != "Error"].copy()
plot_data["Accuracy"] = pd.to_numeric(plot_data["Accuracy"])

plt.figure(figsize=(10, 5))
plt.bar(plot_data["Dataset"], plot_data["Accuracy"], color='orange')
plt.xticks(rotation=45, ha='right')
plt.ylabel("Accuracy")
plt.title("Naive Bayes Accuracy per Dataset")
plt.tight_layout()
plt.show()

