In [13]:
import pandas as pd
from pgmpy.models import DiscreteBayesianNetwork
from pgmpy.estimators import ParameterEstimator, BayesianEstimator
from pgmpy.inference import VariableElimination
from pgmpy.factors.discrete import TabularCPD
import matplotlib.pyplot as plt
import networkx as nx

In [14]:
df = pd.read_csv("Meera_Gems_Campaign_Dataset.csv")
df.rename(columns={
    'Age_Group': 'AgeGroup',
    'Marital_Status': 'MaritalStatus'
}, inplace=True)
df.head()

Unnamed: 0,Customer_ID,Gender,AgeGroup,Business,Employed,MaritalStatus,Success
0,1,Male,>35,Yes,No,Single,Yes
1,2,Female,>35,No,Yes,Married,No
2,3,Female,>35,Yes,No,Single,No
3,4,Female,<=35,No,Yes,Married,Yes
4,5,Female,<=35,Yes,No,Married,No


In [15]:
model = DiscreteBayesianNetwork([
    ('AgeGroup', 'MaritalStatus'),
    ('AgeGroup', 'Success'),
    ('Gender', 'Success'),
    ('Business', 'Employed'),
    ('Employed', 'Success'),
    ('MaritalStatus', 'Success')
])


In [16]:
model.fit(df, estimator=BayesianEstimator, prior_type="BDeu")

INFO:pgmpy: Datatype (N=numerical, C=Categorical Unordered, O=Categorical Ordered) inferred from data: 
 {'Customer_ID': 'N', 'Gender': 'C', 'AgeGroup': 'C', 'Business': 'C', 'Employed': 'C', 'MaritalStatus': 'C', 'Success': 'C'}


<pgmpy.models.DiscreteBayesianNetwork.DiscreteBayesianNetwork at 0x75ed33d50350>

In [17]:
model.check_model()

True

In [None]:
# CPDs for root nodes (no parents)
cpd_age = TabularCPD('AgeGroup', 2, [[0.5], [0.5]])  # 50% <=35, 50% >35
cpd_gender = TabularCPD('Gender', 2, [[0.5], [0.5]])  # 50% Male, 50% Female
cpd_business = TabularCPD('Business', 2, [[0.6], [0.4]])  # 60% No, 40% Yes

# Employed depends on Business
cpd_employed = TabularCPD(
    variable='Employed',
    variable_card=2,
    values=[[0.8, 0.3],   # Employed = No
            [0.2, 0.7]],  # Employed = Yes
    evidence=['Business'],
    evidence_card=[2]
)

# MaritalStatus depends on AgeGroup
cpd_marital = TabularCPD(
    variable='MaritalStatus',
    variable_card=2,
    values=[[0.7, 0.3],   # Single
            [0.3, 0.7]],  # Married
    evidence=['AgeGroup'],
    evidence_card=[2]
)

# Success depends on AgeGroup, Gender, Employed, MaritalStatus
# For 4 binary variables, 2^4 = 16 combinations
cpd_success = TabularCPD(
    variable='Success',
    variable_card=2,
    values=[
        # Success = No
        [0.9, 0.85, 0.8, 0.75,
         0.7, 0.65, 0.6, 0.55,
         0.5, 0.45, 0.4, 0.35,
         0.3, 0.25, 0.2, 0.15],
        # Success = Yes
        [0.1, 0.15, 0.2, 0.25,
         0.3, 0.35, 0.4, 0.45,
         0.5, 0.55, 0.6, 0.65,
         0.7, 0.75, 0.8, 0.85]
    ],
    evidence=['AgeGroup', 'Gender', 'Employed', 'MaritalStatus'],
    evidence_card=[2, 2, 2, 2]
)

# Add all CPDs to the model
model.add_cpds(cpd_age, cpd_gender, cpd_business,
               cpd_employed, cpd_marital, cpd_success)

# Print CPDs
print("CPD of AgeGroup:\n", cpd_age)
print("CPD of Gender:\n", cpd_gender)
print("CPD of Business:\n", cpd_business)
print("CPD of Employed:\n", cpd_employed)
print("CPD of MaritalStatus:\n", cpd_marital)
print("CPD of Success:\n", cpd_success)



Model valid? True
CPD of AgeGroup:
 +-------------+-----+
| AgeGroup(0) | 0.5 |
+-------------+-----+
| AgeGroup(1) | 0.5 |
+-------------+-----+
CPD of Gender:
 +-----------+-----+
| Gender(0) | 0.5 |
+-----------+-----+
| Gender(1) | 0.5 |
+-----------+-----+
CPD of Business:
 +-------------+-----+
| Business(0) | 0.6 |
+-------------+-----+
| Business(1) | 0.4 |
+-------------+-----+
CPD of Employed:
 +-------------+-------------+-------------+
| Business    | Business(0) | Business(1) |
+-------------+-------------+-------------+
| Employed(0) | 0.8         | 0.3         |
+-------------+-------------+-------------+
| Employed(1) | 0.2         | 0.7         |
+-------------+-------------+-------------+
CPD of MaritalStatus:
 +------------------+-------------+-------------+
| AgeGroup         | AgeGroup(0) | AgeGroup(1) |
+------------------+-------------+-------------+
| MaritalStatus(0) | 0.7         | 0.3         |
+------------------+-------------+-------------+
| MaritalStatus(

In [19]:
inference = VariableElimination(model)

In [23]:
#Q3
result = inference.query(variables=['Success'], evidence={'Gender': 1})  # 1 = Female
print(result)

+------------+----------------+
| Success    |   phi(Success) |
| Success(0) |         0.4350 |
+------------+----------------+
| Success(1) |         0.5650 |
+------------+----------------+


In [24]:
#Q4
success_over_35 = df[df['AgeGroup'] == '>35']
proportion = (success_over_35['Success'] == 'Yes').mean()
print(f"Proportion of Success (Age > 35): {proportion:.2f}")


Proportion of Success (Age > 35): 0.53


In [25]:
#Q5
success_by_employment = df.groupby('Employed')['Success'].value_counts(normalize=True).unstack()
print("Success Rate by Employment Status:\n", success_by_employment)

Success Rate by Employment Status:
 Success         No       Yes
Employed                    
No        0.361702  0.638298
Yes       0.509434  0.490566


In [28]:
#Q6
result = inference.query(variables=['Success'], evidence={
    'Business': 0,
    'MaritalStatus': 1
})
print(result)

+------------+----------------+
| Success    |   phi(Success) |
| Success(0) |         0.4500 |
+------------+----------------+
| Success(1) |         0.5500 |
+------------+----------------+


In [29]:
#Q7
result = inference.query(
    variables=['Success'],
    evidence={
        'Gender': 0,
        'AgeGroup': 0,
        'Employed': 1,
        'MaritalStatus': 1
    }
)
print(result)

+------------+----------------+
| Success    |   phi(Success) |
| Success(0) |         0.7500 |
+------------+----------------+
| Success(1) |         0.2500 |
+------------+----------------+
