In [39]:
import pandas as pd
dataset=pd.read_csv("Social_Network_Ads.csv")
dataset

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0
...,...,...,...,...,...
395,15691863,Female,46,41000,1
396,15706071,Male,51,23000,1
397,15654296,Female,50,20000,1
398,15755018,Male,36,33000,0


In [40]:
dataset=dataset.drop("User ID",axis=1)
dataset

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,Male,19,19000,0
1,Male,35,20000,0
2,Female,26,43000,0
3,Female,27,57000,0
4,Male,19,76000,0
...,...,...,...,...
395,Female,46,41000,1
396,Male,51,23000,1
397,Female,50,20000,1
398,Male,36,33000,0


In [41]:
independent=dataset.drop("Purchased",axis=1)
dependent=dataset["Purchased"]

In [42]:
from sklearn.model_selection import train_test_split
Xtrain,Xtest,Ytrain,Ytest=train_test_split(independent,dependent,test_size=0.3,random_state=0)

In [43]:
from sklearn.preprocessing import OneHotEncoder,StandardScaler,MinMaxScaler
from sklearn.compose import ColumnTransformer
# For GaussianNB (can handle negatives)
Preprocess_standard = ColumnTransformer([
    ('num', StandardScaler(), ['Age','EstimatedSalary']),
    ('cat', OneHotEncoder(drop='first'), ['Gender'])
])

# For others (require non-negative)
Preprocess_minmax = ColumnTransformer([
    ('num', MinMaxScaler(), ['Age','EstimatedSalary']),
    ('cat', OneHotEncoder(drop='first'), ['Gender'])
])

In [44]:
from sklearn.pipeline import Pipeline
import numpy as np
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB, CategoricalNB, ComplementNB
models_and_params = {
    "GaussianNB": (
        GaussianNB(),
        {"model__var_smoothing": np.logspace(0, -9, num=10)}
    ),
    "MultinomialNB": (
        MultinomialNB(),
        {"model__alpha": [0.1, 0.5, 1.0, 5.0, 10.0]}
    ),
    "BernoulliNB": (
        BernoulliNB(),
        {
            "model__alpha": [0.1, 0.5, 1.0, 5.0],
            "model__binarize": [0.0, 0.5, 1.0]
        }
    ),
    "CategoricalNB": (
        CategoricalNB(),
        {"model__alpha": [0.1, 0.5, 1.0, 5.0]}
    ),
    "ComplementNB": (
        ComplementNB(),
        {"model__alpha": [0.1, 0.5, 1.0, 5.0, 10.0]}
    )
}

In [45]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
results = {}

for name, (clf, params) in models_and_params.items():
    print(f"\n=== Running GridSearchCV for {name} ===")

    # Choose preprocessing
    if name == "GaussianNB":
        pre = Preprocess_standard
    else:
        pre = Preprocess_minmax

    # Pipeline
    pipe = Pipeline(steps=[
        ("pre", pre),
        ("model", clf)
    ])

    # GridSearchCV must be indented at the same level as the other code inside the loop
    grid = GridSearchCV(pipe, params, cv=3, n_jobs=-1, verbose=0)
    grid.fit(Xtrain, Ytrain)

    # Predictions
    Ypred = grid.predict(Xtest)

    # Metrics
    cm = confusion_matrix(Ytest, Ypred)
    report = classification_report(Ytest, Ypred)
    acc = accuracy_score(Ytest, Ypred)

    # Print results
    print("Best Params:", grid.best_params_)
    print("CV Best Score:", grid.best_score_)
    print("Confusion Matrix:\n", cm)
    print("Classification Report:\n", report)
    print("Test Accuracy:", acc)

    # Save results
    results[name] = {
        "best_params": grid.best_params_,
        "cv_score": grid.best_score_,
        "test_accuracy": acc,
        "report": report,
        "cm": cm
    }

# Final summary
print("\n=== Summary of Results ===")
for name, res in results.items():
    print(f"{name}: CV={res['cv_score']:.3f}, Test={res['test_accuracy']:.3f}, Params={res['best_params']}")

# Find best by Test Accuracy
best_model_acc = max(results.items(), key=lambda x: x[1]['test_accuracy'])
print("\n✅ Best by Test Accuracy:", best_model_acc[0], 
      f"with Accuracy={best_model_acc[1]['test_accuracy']:.3f}")

# Find best by CV Score
best_model_cv = max(results.items(), key=lambda x: x[1]['cv_score'])
print("✅ Best by CV Score:", best_model_cv[0], 
      f"with CV={best_model_cv[1]['cv_score']:.3f}")

#grid=GridSearchCV(pipe,param_grid,n_jobs=-1,verbose=3)
#grid.fit(Xtrain,Ytrain)


=== Running GridSearchCV for GaussianNB ===
Best Params: {'model__var_smoothing': 0.001}
CV Best Score: 0.882254251506139
Confusion Matrix:
 [[76  3]
 [ 9 32]]
Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.96      0.93        79
           1       0.91      0.78      0.84        41

    accuracy                           0.90       120
   macro avg       0.90      0.87      0.88       120
weighted avg       0.90      0.90      0.90       120

Test Accuracy: 0.9

=== Running GridSearchCV for MultinomialNB ===
Best Params: {'model__alpha': 0.1}
CV Best Score: 0.6357050255471669
Confusion Matrix:
 [[79  0]
 [41  0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.66      1.00      0.79        79
           1       0.00      0.00      0.00        41

    accuracy                           0.66       120
   macro avg       0.33      0.50      0.40       120
weighted avg      

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Best Params: {'model__alpha': 0.1, 'model__binarize': 0.5}
CV Best Score: 0.7321741782963471
Confusion Matrix:
 [[71  8]
 [20 21]]
Classification Report:
               precision    recall  f1-score   support

           0       0.78      0.90      0.84        79
           1       0.72      0.51      0.60        41

    accuracy                           0.77       120
   macro avg       0.75      0.71      0.72       120
weighted avg       0.76      0.77      0.75       120

Test Accuracy: 0.7666666666666667

=== Running GridSearchCV for CategoricalNB ===
Best Params: {'model__alpha': 0.1}
CV Best Score: 0.6392892549378479
Confusion Matrix:
 [[79  0]
 [40  1]]
Classification Report:
               precision    recall  f1-score   support

           0       0.66      1.00      0.80        79
           1       1.00      0.02      0.05        41

    accuracy                           0.67       120
   macro avg       0.83      0.51      0.42       120
weighted avg       0.78      0.67

In [47]:
print("\n=== Classification Reports for All Models ===")
for name, res in results.items():
    print(f"\n--- {name} ---")
    print(res["cm"])
    print(res["report"])


=== Classification Reports for All Models ===

--- GaussianNB ---
[[76  3]
 [ 9 32]]
              precision    recall  f1-score   support

           0       0.89      0.96      0.93        79
           1       0.91      0.78      0.84        41

    accuracy                           0.90       120
   macro avg       0.90      0.87      0.88       120
weighted avg       0.90      0.90      0.90       120


--- MultinomialNB ---
[[79  0]
 [41  0]]
              precision    recall  f1-score   support

           0       0.66      1.00      0.79        79
           1       0.00      0.00      0.00        41

    accuracy                           0.66       120
   macro avg       0.33      0.50      0.40       120
weighted avg       0.43      0.66      0.52       120


--- BernoulliNB ---
[[71  8]
 [20 21]]
              precision    recall  f1-score   support

           0       0.78      0.90      0.84        79
           1       0.72      0.51      0.60        41

    accuracy  