In [1]:
import pandas as pd
import numpy as np

# Create a dataset with 10 rows
data = {
    'CustomerID': range(1, 11),
    'Gender': ['Male', 'Female', 'Female', 'Male', 'Male', 'Female', 'Male', 'Female', 'Male', 'Female'],
    'ProductCategory': ['Electronics', 'Clothing', 'Clothing', 'Electronics', 'Home', 'Electronics', 'Clothing', 'Home', 'Electronics', 'Clothing'],
    'Purchased': ['Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'No']
}
df = pd.DataFrame(data)
print("Dataset:")
print(df)

Dataset:
   CustomerID  Gender ProductCategory Purchased
0           1    Male     Electronics       Yes
1           2  Female        Clothing       Yes
2           3  Female        Clothing        No
3           4    Male     Electronics       Yes
4           5    Male            Home        No
5           6  Female     Electronics       Yes
6           7    Male        Clothing        No
7           8  Female            Home       Yes
8           9    Male     Electronics       Yes
9          10  Female        Clothing        No


In [2]:
# Basic Probability: P(Purchased='Yes')
total_customers = len(df)
purchased_yes = len(df[df['Purchased'] == 'Yes'])
prob_purchase = purchased_yes / total_customers
print(f"P(Purchase='Yes'): {prob_purchase}")

# Joint Probability: P(Male AND Electronics)
male_electronics = len(df[(df['Gender'] == 'Male') & (df['ProductCategory'] == 'Electronics')])
prob_male_electronics = male_electronics / total_customers
print(f"P(Male AND Electronics): {prob_male_electronics}")

P(Purchase='Yes'): 0.6
P(Male AND Electronics): 0.3


In [3]:
# Conditional Probability: P(Purchase='Yes' | Gender='Female')
# P(A|B) = P(A and B) / P(B)
females = df[df['Gender'] == 'Female']
female_purchased = len(females[females['Purchased'] == 'Yes'])
prob_purchase_given_female = female_purchased / len(females)
print(f"P(Purchase='Yes' | Gender='Female'): {prob_purchase_given_female}")

P(Purchase='Yes' | Gender='Female'): 0.6


In [4]:
# Bayes Theorem Example
# P(A|B) = (P(B|A) * P(A)) / P(B)
# Let A = Purchased='Yes', B = Gender='Male'
# Calculate P(Male|Purchase='Yes') using Bayes

p_a = prob_purchase # P(Purchase='Yes')
p_b = len(df[df['Gender'] == 'Male']) / total_customers # P(Male)

# P(B|A) = P(Male | Purchase='Yes')
purchased_df = df[df['Purchased'] == 'Yes']
p_b_given_a = len(purchased_df[purchased_df['Gender'] == 'Male']) / len(purchased_df)

bayes_result = (p_b_given_a * p_a) / p_b
print(f"Calculated P(Male|Purchase='Yes') using Bayes: {bayes_result}")

# Verification
actual_val = len(df[(df['Gender'] == 'Male') & (df['Purchased'] == 'Yes')]) / len(df[df['Purchased'] == 'Yes'])
print(f"Actual P(Male|Purchase='Yes'): {actual_val}")

Calculated P(Male|Purchase='Yes') using Bayes: 0.6
Actual P(Male|Purchase='Yes'): 0.5
