## Naive Bayes on Social_Network_Ads dataset

This notebook fits a Naive Bayes classifier using `sklearn` on the Social_Network_Ads dataset with minimal preprocessing.


In [None]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the Social_Network_Ads dataset
df = pd.read_csv("data/Social_Network_Ads.csv")

# Choose the target column and build feature matrix
target_col = "Purchased" if "Purchased" in df.columns else df.columns[-1]

X = df.drop(columns=[target_col])
y = df[target_col]

# One-hot encode any categorical columns (e.g., Gender)
X = pd.get_dummies(X, drop_first=True)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

X_train.shape, X_test.shape


In [None]:
# Simple preprocessing + GaussianNB pipeline
model = Pipeline(
    steps=[
        ("scaler", StandardScaler()),
        ("nb", GaussianNB()),
    ]
)

model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("GaussianNB accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification report:\n", classification_report(y_test, y_pred))
