In [None]:
pip install pgmpy



In [None]:
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.factors.discrete import TabularCPD
from pgmpy.inference import VariableElimination
import numpy as np
import pandas as pd

In [None]:
csv_file_path = "/content/spam.csv"
df = pd.read_csv(csv_file_path)

In [None]:
text_column_name = 'Message'
label_column_name = 'Category'
df[label_column_name] = df[label_column_name].map({'ham': 0, 'spam': 1})

In [None]:
model = BayesianModel([('Message_encoded', 'Category')])
messages = df[text_column_name].unique()



In [None]:
message_mapping = {msg: idx for idx, msg in enumerate(messages)}
df['Message_encoded'] = df[text_column_name].map(message_mapping)


In [None]:
model.fit(df, estimator=MaximumLikelihoodEstimator)

In [None]:
num_messages = len(messages)
category_values = np.zeros((2, num_messages))

for i, msg in enumerate(messages):
    category_distribution = df[df[text_column_name] == msg][label_column_name].value_counts(normalize=True).reindex([0, 1], fill_value=0)
    category_values[:, i] = category_distribution.values


category_cpd = TabularCPD(
    variable='Category',
    variable_card=2,
    values=category_values,
    evidence=['Message_encoded'],
    evidence_card=[num_messages],
    state_names={
        'Category': ['ham', 'spam'],
        'Message_encoded': list(range(num_messages))
    }
)



In [None]:
model.add_cpds(category_cpd)




In [None]:
assert model.check_model()

In [None]:
inference = VariableElimination(model)

In [None]:
new_texts = [
    'U dun say so early hor... U c already then say...',
    'Ok lar... Joking wif u oni...'
]


for text in new_texts:
    if text in message_mapping:
        encoded_message = message_mapping[text]
        evidence = {'Message_encoded': encoded_message}


        result = inference.map_query(variables=[label_column_name], evidence=evidence)


        prediction = 'spam' if result[label_column_name] == 1 else 'ham'
        print(f"Text: {text} => Prediction: {prediction}")
    else:
        print(f"Text: {text} => Prediction: Unable to classify (message not in training data)")




0it [00:00, ?it/s]

0it [00:00, ?it/s]



Text: U dun say so early hor... U c already then say... => Prediction: ham


0it [00:00, ?it/s]

0it [00:00, ?it/s]

Text: Ok lar... Joking wif u oni... => Prediction: ham
