In [24]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
from sklearn.metrics import accuracy_score
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.SettingWithCopyWarning)

# Load dataset

In [25]:
file_path = "advertising_ef.csv"
df = pd.read_csv(file_path)

# Drop rows with missing values

In [26]:
# Drop rows with missing values
df_cleaned = df.dropna()

### Ensure 'Clicked on Ad' has no missing values

In [27]:

# Ensure 'Clicked on Ad' has no missing values
df_cleaned = df_cleaned.dropna(subset=['Clicked on Ad'])

# Encode categorical features using OneHotEncoder

In [28]:
from sklearn.preprocessing import OrdinalEncoder
categorical_features = ['Country', 'City']
ordinal_enc = OrdinalEncoder()
df_cleaned[categorical_features] = ordinal_enc.fit_transform(df_cleaned[categorical_features])

## Convert categorical features to integer type for MultinomialNB

In [29]:
# Convert categorical features to integer type for MultinomialNB
df_cleaned[categorical_features] = df_cleaned[categorical_features].astype(int)


# Binary encoding for 'Gender'

In [31]:

df_cleaned['Gender'] = df_cleaned['Gender'].map({'Male': 1, 'Female': 0})


# Define features (X) and target (y)

In [32]:
# Define features (X) and target (y)
numerical_features = ['Daily Time Spent on Site', 'Age', 'Area Income', 'Daily Internet Usage']
X = df_cleaned[numerical_features + categorical_features + ['Gender']]
y = df_cleaned['Clicked on Ad']

# Ensure X and y have the same number of rows

In [33]:
# Ensure X and y have the same number of rows
X = X.loc[y.index]


# Standardize numerical features

In [34]:
# Standardize numerical features
scaler = StandardScaler()
X[numerical_features] = scaler.fit_transform(X[numerical_features])


# Split into train and test sets

In [35]:
# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)


## Gaussian Naïve Bayes (for numerical features)

In [37]:
# Gaussian Naïve Bayes (for numerical features)
gnb = GaussianNB()
X_train_gnb = X_train[numerical_features]
X_test_gnb = X_test[numerical_features]
gnb.fit(X_train_gnb, y_train)
probs_gnb = gnb.predict_proba(X_test_gnb)

# Multinomial Naïve Bayes (for categorical features)

In [39]:
# Multinomial Naïve Bayes (for categorical features)
if not X_train[categorical_features].empty:
    mnb = MultinomialNB()
    X_train_mnb = X_train[categorical_features]
    X_test_mnb = X_test[categorical_features]
    mnb.fit(X_train_mnb, y_train)
    probs_mnb = mnb.predict_proba(X_test_mnb)
else:
    probs_mnb = np.ones((len(y_test), 2)) * 0.5  # Default neutral probabilities if empty

# Bernoulli Naïve Bayes (for binary feature: Gender)

In [40]:
# Bernoulli Naïve Bayes (for binary feature: Gender)
bnb = BernoulliNB()
X_train_bnb = X_train[['Gender']]
X_test_bnb = X_test[['Gender']]
bnb.fit(X_train_bnb, y_train)
probs_bnb = bnb.predict_proba(X_test_bnb)


# Ensemble Using Probability Multiplication

In [41]:
# Ensemble Using Probability Multiplication
ensemble_probs = probs_gnb * probs_mnb * probs_bnb  # Multiply probabilities
ensemble_probs = ensemble_probs / np.sum(ensemble_probs, axis=1, keepdims=True)  # Normalize

# Final predictions

In [42]:
# Final predictions
final_predictions = ensemble_probs[:, 1] >= 0.5  # Convert probabilities to binary (0 or 1)

# Model accuracies

In [43]:
# Model accuracies
accuracy_gnb = accuracy_score(y_test, gnb.predict(X_test_gnb))
accuracy_mnb = accuracy_score(y_test, mnb.predict(X_test_mnb)) if not X_train[categorical_features].empty else None
accuracy_bnb = accuracy_score(y_test, bnb.predict(X_test_bnb))
ensemble_accuracy = accuracy_score(y_test, final_predictions)


## Print results

In [44]:
# Print results
print("Gaussian Naïve Bayes Accuracy:", accuracy_gnb)
if accuracy_mnb is not None:
    print("Multinomial Naïve Bayes Accuracy:", accuracy_mnb)
print("Bernoulli Naïve Bayes Accuracy:", accuracy_bnb)
print("Ensemble Model Accuracy:", ensemble_accuracy)

Gaussian Naïve Bayes Accuracy: 0.9581881533101045
Multinomial Naïve Bayes Accuracy: 0.5296167247386759
Bernoulli Naïve Bayes Accuracy: 0.5052264808362369
Ensemble Model Accuracy: 0.9547038327526133



# Convert probabilities to DataFrame

In [None]:

# Convert probabilities to DataFrame for better understanding in the form of table
prob_df = pd.DataFrame({
    'Actual Target': y_test.values,
    'GNB - P(No)': probs_gnb[:, 0], 'GNB - P(Yes)': probs_gnb[:, 1],
    'MNB - P(No)': probs_mnb[:, 0], 'MNB - P(Yes)': probs_mnb[:, 1],
    'BNB - P(No)': probs_bnb[:, 0], 'BNB - P(Yes)': probs_bnb[:, 1],
    'Ensemble - P(No)': ensemble_probs[:, 0], 'Ensemble - P(Yes)': ensemble_probs[:, 1],
    'Final Prediction': final_predictions
})


# Map 0 -> "No", 1 -> "Yes"

In [None]:
# Map 0 -> "No", 1 -> "Yes" for better readability
prob_df['Actual Target'] = prob_df['Actual Target'].map({0: "No", 1: "Yes"})
prob_df['Final Prediction'] = prob_df['Final Prediction'].map({0: "No", 1: "Yes"})

# Display DataFrame

In [50]:
# Display DataFrame in tabular format with rounded values
prob_df = prob_df.round(6)
prob_df.head()


Unnamed: 0,Actual Target,GNB - P(No),GNB - P(Yes),MNB - P(No),MNB - P(Yes),BNB - P(No),BNB - P(Yes),Ensemble - P(No),Ensemble - P(Yes),Final Prediction
0,Yes,0.0,1.0,0.515602,0.484398,0.520093,0.479907,0.0,1.0,
1,No,0.999353,0.000647,0.156657,0.843343,0.471398,0.528602,0.996107,0.003893,
2,No,0.99898,0.00102,0.61562,0.38438,0.520093,0.479907,0.999412,0.000588,
3,No,0.996987,0.003013,0.462489,0.537511,0.471398,0.528602,0.996076,0.003924,
4,Yes,2e-06,0.999998,0.287164,0.712836,0.471398,0.528602,1e-06,0.999999,
