## Installing and Importing Packages ##

In [None]:
!pip install pybbn

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import random
import pandas as pd

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_validate, RepeatedStratifiedKFold

from pybbn.graph.dag import Bbn
from pybbn.graph.edge import Edge, EdgeType
from pybbn.graph.jointree import EvidenceBuilder
from pybbn.graph.node import BbnNode
from pybbn.graph.variable import Variable
from pybbn.pptc.inferencecontroller import InferenceController

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## Final Dataset Info##

In [None]:
final_data = pd.read_csv('/content/drive/MyDrive/fakeBN/data/final_data.csv')
final_data.shape

In [None]:
# Count rows where majority_target is 1
count_majority_1 = final_data[final_data['majority_target'] == 1].shape[0]

# Count rows where majority_target is 0
count_majority_0 = final_data[final_data['majority_target'] == 0].shape[0]

print("Total true news :", count_majority_1)
print("Total fake news :", count_majority_0)

In [None]:
# Find the earliest and latest timestamps
earliest_timestamp = final_data['timestamp'].min()
latest_timestamp = final_data['timestamp'].max()

print("Earliest Timestamp :", earliest_timestamp)
print("Latest Timestamp :", latest_timestamp)

## Fake News Detection with BN ##


In [None]:
data_columns = [
    'majority_target', 'followers_count', 'friends_count', 'favourites_count', 'statuses_count', 'following', 'mentions',
    'quotes', 'replies', 'retweets', 'favourites', 'hashtags', 'URLs', 'BotScoreBinary', 'cred', 'normalize_influence',
    'unique_count', 'total_count', 'ORG_percentage', 'NORP_percentage', 'GPE_percentage', 'PERSON_percentage',
    'MONEY_percentage', 'DATE_percentage', 'CARDINAL_percentage', 'PERCENT_percentage', 'ORDINAL_percentage',
    'FAC_percentage', 'LAW_percentage', 'PRODUCT_percentage', 'EVENT_percentage', 'TIME_percentage', 'LOC_percentage',
    'WORK_OF_ART_percentage', 'QUANTITY_percentage', 'LANGUAGE_percentage', 'Word count', 'Max word length',
    'Min word length', 'Average word length', 'present_verbs', 'past_verbs', 'adjectives', 'adverbs', 'adpositions',
    'pronouns', 'TOs', 'determiners', 'conjunctions', 'dots', 'exclamation', 'questions', 'ampersand', 'capitals',
    'digits', 'long_word_freq', 'short_word_freq', 'matches_significant_event', 'day', 'hour', 'minute', 'month', 'quarter',
    'is_weekend']
who_columns = ['majority_target',  'followers_count', 'friends_count', 'favourites_count' , 'statuses_count', 'following',
               'mentions', 'quotes', 'replies', 'retweets', 'favourites', 'hashtags', 'URLs', 'BotScoreBinary', 'cred', 'normalize_influence']
when_columns = ['majority_target', 'matches_significant_event', 'day', 'hour', 'minute', 'month', 'quarter', 'is_weekend']

what_columns = ['majority_target', 'present_verbs', 'past_verbs', 'adjectives', 'adverbs', 'adpositions', 'pronouns', 'TOs', 'determiners', 'conjunctions',
                'dots', 'exclamation', 'questions', 'ampersand', 'capitals', 'digits', 'long_word_freq', 'short_word_freq', 'unique_count', 'total_count', 'ORG_percentage', 'NORP_percentage', 'GPE_percentage', 'PERSON_percentage',
                'MONEY_percentage', 'DATE_percentage', 'CARDINAL_percentage', 'PERCENT_percentage', 'ORDINAL_percentage', 'FAC_percentage',
                'LAW_percentage', 'PRODUCT_percentage', 'EVENT_percentage', 'TIME_percentage', 'LOC_percentage', 'WORK_OF_ART_percentage',
                'QUANTITY_percentage', 'LANGUAGE_percentage', 'Word count', 'Max word length', 'Min word length', 'Average word length']

In [None]:
# only in case of uniform feature experiment

# who_columns = who_columns[:8]
# what_columns = what_columns[:8]
# when_columns = when_columns[:8]

In [None]:
final_data = pd.read_csv('/content/drive/MyDrive/wwwBN/data/final_data.csv')
data = final_data[data_columns]

In [None]:
# who, what and when modules

# Initialize lists to store metrics
who_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': []}
what_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': []}
when_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': []}
bn_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': []}

kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=1)
for train_index, test_index in kf.split(data, data['majority_target']):
    np.random.shuffle(train_index)
    midpoint = len(train_index) // 2
    module_training = train_index[:midpoint]
    cpt_formation = train_index[midpoint:]
    module_training_data = data.iloc[module_training].copy().reset_index(drop=True)
    cpt_data = data.iloc[cpt_formation].copy().reset_index(drop=True)
    test_data = data.iloc[test_index].copy().reset_index(drop=True)

    # Module training
    who_mt = module_training_data[who_columns].copy()
    what_mt = module_training_data[what_columns].copy()
    when_mt = module_training_data[when_columns].copy()

    who_mt_attr = who_mt.drop('majority_target', axis=1)
    what_mt_attr = what_mt.drop('majority_target', axis=1)
    when_mt_attr = when_mt.drop('majority_target', axis=1)

    who_mt_label = who_mt['majority_target']
    what_mt_label = what_mt['majority_target']
    when_mt_label = when_mt['majority_target']

    # Training and evaluating the who model
    X_train, X_test, y_train, y_test = train_test_split(who_mt_attr, who_mt_label, test_size=0.2, random_state=42)
    who_model = RandomForestClassifier()
    who_model.fit(X_train, y_train)
    predictions = who_model.predict(X_test)
    who_metrics['accuracy'].append(accuracy_score(y_test, predictions))
    who_metrics['precision'].append(precision_score(y_test, predictions))
    who_metrics['recall'].append(recall_score(y_test, predictions))
    who_metrics['f1'].append(f1_score(y_test, predictions))
    print(f'Who model - Accuracy: {who_metrics["accuracy"][-1]}, Precision: {who_metrics["precision"][-1]}, Recall: {who_metrics["recall"][-1]}, F1: {who_metrics["f1"][-1]}')

    # Training and evaluating the what model
    X_train, X_test, y_train, y_test = train_test_split(what_mt_attr, what_mt_label, test_size=0.2, random_state=42)
    what_model = RandomForestClassifier()
    what_model.fit(X_train, y_train)
    predictions = what_model.predict(X_test)
    what_metrics['accuracy'].append(accuracy_score(y_test, predictions))
    what_metrics['precision'].append(precision_score(y_test, predictions))
    what_metrics['recall'].append(recall_score(y_test, predictions))
    what_metrics['f1'].append(f1_score(y_test, predictions))
    print(f'What model - Accuracy: {what_metrics["accuracy"][-1]}, Precision: {what_metrics["precision"][-1]}, Recall: {what_metrics["recall"][-1]}, F1: {what_metrics["f1"][-1]}')

    # Training and evaluating the when model
    X_train, X_test, y_train, y_test = train_test_split(when_mt_attr, when_mt_label, test_size=0.2, random_state=42)
    when_model = RandomForestClassifier()
    when_model.fit(X_train, y_train)
    predictions = when_model.predict(X_test)
    when_metrics['accuracy'].append(accuracy_score(y_test, predictions))
    when_metrics['precision'].append(precision_score(y_test, predictions))
    when_metrics['recall'].append(recall_score(y_test, predictions))
    when_metrics['f1'].append(f1_score(y_test, predictions))
    print(f'When model - Accuracy: {when_metrics["accuracy"][-1]}, Precision: {when_metrics["precision"][-1]}, Recall: {when_metrics["recall"][-1]}, F1: {when_metrics["f1"][-1]}')

    # CPT formation
    who_cp = cpt_data[who_columns].copy()
    what_cp = cpt_data[what_columns].copy()
    when_cp = cpt_data[when_columns].copy()

    who_cp_attr = who_cp.drop('majority_target', axis=1)
    what_cp_attr = what_cp.drop('majority_target', axis=1)
    when_cp_attr = when_cp.drop('majority_target', axis=1)

    who_cp_label = who_cp['majority_target']
    what_cp_label = what_cp['majority_target']
    when_cp_label = when_cp['majority_target']

    who_cp_pred = who_model.predict(who_cp_attr)
    what_cp_pred = what_model.predict(what_cp_attr)
    when_cp_pred = when_model.predict(when_cp_attr)

    total_true_cp = np.sum(cpt_data['majority_target'])
    total_fake_cp = len(cpt_data) - np.sum(cpt_data['majority_target'])

    who_t_t = who_t_f = who_f_t = who_f_f = 0
    what_t_t = what_t_f = what_f_t = what_f_f = 0
    when_t_t = when_t_f = when_f_t = when_f_f = 0

    for i in range(len(cpt_data)):
        if who_cp_pred[i] == 1 and who_cp_label[i] == 1:
            who_t_t += 1
        if who_cp_pred[i] == 1 and who_cp_label[i] == 0:
            who_t_f += 1
        if who_cp_pred[i] == 0 and who_cp_label[i] == 1:
            who_f_t += 1
        if who_cp_pred[i] == 0 and who_cp_label[i] == 0:
            who_f_f += 1

        if what_cp_pred[i] == 1 and what_cp_label[i] == 1:
            what_t_t += 1
        if what_cp_pred[i] == 1 and what_cp_label[i] == 0:
            what_t_f += 1
        if what_cp_pred[i] == 0 and what_cp_label[i] == 1:
            what_f_t += 1
        if what_cp_pred[i] == 0 and what_cp_label[i] == 0:
            what_f_f += 1

        if when_cp_pred[i] == 1 and when_cp_label[i] == 1:
            when_t_t += 1
        if when_cp_pred[i] == 1 and when_cp_label[i] == 0:
            when_t_f += 1
        if when_cp_pred[i] == 0 and when_cp_label[i] == 1:
            when_f_t += 1
        if when_cp_pred[i] == 0 and when_cp_label[i] == 0:
            when_f_f += 1

    prob_who_t_t = who_t_t / total_true_cp
    prob_who_t_f = who_t_f / total_fake_cp
    prob_who_f_t = who_f_t / total_true_cp
    prob_who_f_f = who_f_f / total_fake_cp

    prob_what_t_t = what_t_t / total_true_cp
    prob_what_t_f = what_t_f / total_fake_cp
    prob_what_f_t = what_f_t / total_true_cp
    prob_what_f_f = what_f_f / total_fake_cp

    prob_when_t_t = when_t_t / total_true_cp
    prob_when_t_f = when_t_f / total_fake_cp
    prob_when_f_t = when_f_t / total_true_cp
    prob_when_f_f = when_f_f / total_fake_cp

    prob_t = total_true_cp / (total_true_cp + total_fake_cp)
    prob_f = total_fake_cp / (total_true_cp + total_fake_cp)

    # Bayesian network
    a = BbnNode(Variable(0, 'Authenticity', ['1', '0']), [prob_t, prob_f])
    who = BbnNode(Variable(1, 'Who', ['1', '0']), [prob_who_t_t, prob_who_f_t, prob_who_t_f, prob_who_f_f])
    what = BbnNode(Variable(2, 'What', ['1', '0']), [prob_what_t_t, prob_what_f_t, prob_what_t_f, prob_what_f_f])
    when = BbnNode(Variable(3, 'When', ['1', '0']), [prob_when_t_t, prob_when_f_t, prob_when_t_f, prob_when_f_f])

    bbn = Bbn() \
        .add_node(a) \
        .add_node(who) \
        .add_node(what) \
        .add_node(when) \
        .add_edge(Edge(a, who, EdgeType.DIRECTED)) \
        .add_edge(Edge(a, what, EdgeType.DIRECTED)) \
        .add_edge(Edge(a, when, EdgeType.DIRECTED))

    join_tree = InferenceController.apply(bbn)

    # Testing
    who_test = test_data[who_columns].copy()
    what_test = test_data[what_columns].copy()
    when_test = test_data[when_columns].copy()

    who_test_attr = who_test.drop('majority_target', axis=1)
    what_test_attr = what_test.drop('majority_target', axis=1)
    when_test_attr = when_test.drop('majority_target', axis=1)

    test_label = test_data['majority_target']

    who_test_pred = who_model.predict(who_test_attr)
    what_test_pred = what_model.predict(what_test_attr)
    when_test_pred = when_model.predict(when_test_attr)

    who_test_pred = who_test_pred.astype(int)
    what_test_pred = what_test_pred.astype(int)
    when_test_pred = when_test_pred.astype(int)

    bn_predictions = []
    for i in range(len(test_data)):
        join_tree = InferenceController.apply(bbn)
        who_ev = str(who_test_pred[i])
        what_ev = str(what_test_pred[i])
        when_ev = str(when_test_pred[i])

        ev1 = EvidenceBuilder() \
            .with_node(join_tree.get_bbn_node_by_name('Who')) \
            .with_evidence(who_ev, 1.0) \
            .build()
        join_tree.set_observation(ev1)

        ev2 = EvidenceBuilder() \
            .with_node(join_tree.get_bbn_node_by_name('What')) \
            .with_evidence(what_ev, 1.0) \
            .build()
        join_tree.set_observation(ev2)

        ev3 = EvidenceBuilder() \
            .with_node(join_tree.get_bbn_node_by_name('When')) \
            .with_evidence(when_ev, 1.0) \
            .build()
        join_tree.set_observation(ev3)

        query_node = join_tree.get_bbn_node_by_name('Authenticity')
        potential = join_tree.get_bbn_potential(query_node)

        for node, posteriors in join_tree.get_posteriors().items():
            if node == 'Authenticity':
                if posteriors['1'] > posteriors['0']:
                    predicted_label = 1
                else:
                    predicted_label = 0
                bn_predictions.append(predicted_label)
                break

    bn_metrics['accuracy'].append(accuracy_score(test_label, bn_predictions))
    bn_metrics['precision'].append(precision_score(test_label, bn_predictions))
    bn_metrics['recall'].append(recall_score(test_label, bn_predictions))
    bn_metrics['f1'].append(f1_score(test_label, bn_predictions))

    print(f'BN model - Accuracy: {bn_metrics["accuracy"][-1]}, Precision: {bn_metrics["precision"][-1]}, Recall: {bn_metrics["recall"][-1]}, F1: {bn_metrics["f1"][-1]}')
    print("\n ----------------------------------------------------------------------------- \n")
# Calculate and print average metrics for each model
print("\nAverage Metrics:")
for model_name, metrics in [('Who', who_metrics), ('What', what_metrics), ('When', when_metrics), ('BN', bn_metrics)]:
    print(f'\n{model_name} model:')
    for metric_name, metric_values in metrics.items():
        avg_metric = np.mean(metric_values)
        print(f'  Average {metric_name}: {avg_metric:.4f}')


In [None]:
# who and what module

# Initialize lists to store metrics
who_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': []}
what_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': []}
bn_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': []}

kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=1)
for train_index, test_index in kf.split(data, data['majority_target']):
    np.random.shuffle(train_index)
    midpoint = len(train_index) // 2
    module_training = train_index[:midpoint]
    cpt_formation = train_index[midpoint:]
    module_training_data = data.iloc[module_training].copy().reset_index(drop=True)
    cpt_data = data.iloc[cpt_formation].copy().reset_index(drop=True)
    test_data = data.iloc[test_index].copy().reset_index(drop=True)

    # Module training
    who_mt = module_training_data[who_columns].copy()
    what_mt = module_training_data[what_columns].copy()

    who_mt_attr = who_mt.drop('majority_target', axis=1)
    what_mt_attr = what_mt.drop('majority_target', axis=1)

    who_mt_label = who_mt['majority_target']
    what_mt_label = what_mt['majority_target']

    # Training and evaluating the who model
    X_train, X_test, y_train, y_test = train_test_split(who_mt_attr, who_mt_label, test_size=0.2, random_state=42)
    who_model = RandomForestClassifier()
    who_model.fit(X_train, y_train)
    predictions = who_model.predict(X_test)
    who_metrics['accuracy'].append(accuracy_score(y_test, predictions))
    who_metrics['precision'].append(precision_score(y_test, predictions))
    who_metrics['recall'].append(recall_score(y_test, predictions))
    who_metrics['f1'].append(f1_score(y_test, predictions))
    print(f'Who model - Accuracy: {who_metrics["accuracy"][-1]}, Precision: {who_metrics["precision"][-1]}, Recall: {who_metrics["recall"][-1]}, F1: {who_metrics["f1"][-1]}')

    # Training and evaluating the what model
    X_train, X_test, y_train, y_test = train_test_split(what_mt_attr, what_mt_label, test_size=0.2, random_state=42)
    what_model = RandomForestClassifier()
    what_model.fit(X_train, y_train)
    predictions = what_model.predict(X_test)
    what_metrics['accuracy'].append(accuracy_score(y_test, predictions))
    what_metrics['precision'].append(precision_score(y_test, predictions))
    what_metrics['recall'].append(recall_score(y_test, predictions))
    what_metrics['f1'].append(f1_score(y_test, predictions))
    print(f'What model - Accuracy: {what_metrics["accuracy"][-1]}, Precision: {what_metrics["precision"][-1]}, Recall: {what_metrics["recall"][-1]}, F1: {what_metrics["f1"][-1]}')

    # CPT formation
    who_cp = cpt_data[who_columns].copy()
    what_cp = cpt_data[what_columns].copy()

    who_cp_attr = who_cp.drop('majority_target', axis=1)
    what_cp_attr = what_cp.drop('majority_target', axis=1)

    who_cp_label = who_cp['majority_target']
    what_cp_label = what_cp['majority_target']

    who_cp_pred = who_model.predict(who_cp_attr)
    what_cp_pred = what_model.predict(what_cp_attr)

    total_true_cp = np.sum(cpt_data['majority_target'])
    total_fake_cp = len(cpt_data) - np.sum(cpt_data['majority_target'])

    who_t_t = who_t_f = who_f_t = who_f_f = 0
    what_t_t = what_t_f = what_f_t = what_f_f = 0

    for i in range(len(cpt_data)):
        if who_cp_pred[i] == 1 and who_cp_label[i] == 1:
            who_t_t += 1
        if who_cp_pred[i] == 1 and who_cp_label[i] == 0:
            who_t_f += 1
        if who_cp_pred[i] == 0 and who_cp_label[i] == 1:
            who_f_t += 1
        if who_cp_pred[i] == 0 and who_cp_label[i] == 0:
            who_f_f += 1

        if what_cp_pred[i] == 1 and what_cp_label[i] == 1:
            what_t_t += 1
        if what_cp_pred[i] == 1 and what_cp_label[i] == 0:
            what_t_f += 1
        if what_cp_pred[i] == 0 and what_cp_label[i] == 1:
            what_f_t += 1
        if what_cp_pred[i] == 0 and what_cp_label[i] == 0:
            what_f_f += 1

    prob_who_t_t = who_t_t / total_true_cp
    prob_who_t_f = who_t_f / total_fake_cp
    prob_who_f_t = who_f_t / total_true_cp
    prob_who_f_f = who_f_f / total_fake_cp

    prob_what_t_t = what_t_t / total_true_cp
    prob_what_t_f = what_t_f / total_fake_cp
    prob_what_f_t = what_f_t / total_true_cp
    prob_what_f_f = what_f_f / total_fake_cp

    prob_t = total_true_cp / (total_true_cp + total_fake_cp)
    prob_f = total_fake_cp / (total_true_cp + total_fake_cp)

    # Bayesian network
    a = BbnNode(Variable(0, 'Authenticity', ['1', '0']), [prob_t, prob_f])
    who = BbnNode(Variable(1, 'Who', ['1', '0']), [prob_who_t_t, prob_who_f_t, prob_who_t_f, prob_who_f_f])
    what = BbnNode(Variable(2, 'What', ['1', '0']), [prob_what_t_t, prob_what_f_t, prob_what_t_f, prob_what_f_f])

    bbn = Bbn() \
        .add_node(a) \
        .add_node(who) \
        .add_node(what) \
        .add_edge(Edge(a, who, EdgeType.DIRECTED)) \
        .add_edge(Edge(a, what, EdgeType.DIRECTED))

    join_tree = InferenceController.apply(bbn)

    # Testing
    who_test = test_data[who_columns].copy()
    what_test = test_data[what_columns].copy()

    who_test_attr = who_test.drop('majority_target', axis=1)
    what_test_attr = what_test.drop('majority_target', axis=1)

    test_label = test_data['majority_target']

    who_test_pred = who_model.predict(who_test_attr)
    what_test_pred = what_model.predict(what_test_attr)

    who_test_pred = who_test_pred.astype(int)
    what_test_pred = what_test_pred.astype(int)

    bn_predictions = []
    for i in range(len(test_data)):
        join_tree = InferenceController.apply(bbn)
        who_ev = str(who_test_pred[i])
        what_ev = str(what_test_pred[i])

        ev1 = EvidenceBuilder() \
            .with_node(join_tree.get_bbn_node_by_name('Who')) \
            .with_evidence(who_ev, 1.0) \
            .build()
        join_tree.set_observation(ev1)

        ev2 = EvidenceBuilder() \
            .with_node(join_tree.get_bbn_node_by_name('What')) \
            .with_evidence(what_ev, 1.0) \
            .build()
        join_tree.set_observation(ev2)

        query_node = join_tree.get_bbn_node_by_name('Authenticity')
        potential = join_tree.get_bbn_potential(query_node)

        for node, posteriors in join_tree.get_posteriors().items():
            if node == 'Authenticity':
                if posteriors['1'] > posteriors['0']:
                    predicted_label = 1
                else:
                    predicted_label = 0
                bn_predictions.append(predicted_label)
                break

    bn_metrics['accuracy'].append(accuracy_score(test_label, bn_predictions))
    bn_metrics['precision'].append(precision_score(test_label, bn_predictions))
    bn_metrics['recall'].append(recall_score(test_label, bn_predictions))
    bn_metrics['f1'].append(f1_score(test_label, bn_predictions))

    print(f'BN model - Accuracy: {bn_metrics["accuracy"][-1]}, Precision: {bn_metrics["precision"][-1]}, Recall: {bn_metrics["recall"][-1]}, F1: {bn_metrics["f1"][-1]}')
    print("\n ----------------------------------------------------------------------------- \n")

# Calculate and print average metrics for each model
print("\nAverage Metrics:")
for model_name, metrics in [('Who', who_metrics), ('What', what_metrics), ('BN', bn_metrics)]:
    print(f'\n{model_name} model:')
    for metric_name, metric_values in metrics.items():
        avg_metric = np.mean(metric_values)
        print(f'  Average {metric_name}: {avg_metric:.4f}')


In [None]:
# who and when module

# Initialize lists to store metrics
who_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': []}
when_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': []}
bn_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': []}

kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=1)
for train_index, test_index in kf.split(data, data['majority_target']):
    np.random.shuffle(train_index)
    midpoint = len(train_index) // 2
    module_training = train_index[:midpoint]
    cpt_formation = train_index[midpoint:]
    module_training_data = data.iloc[module_training].copy().reset_index(drop=True)
    cpt_data = data.iloc[cpt_formation].copy().reset_index(drop=True)
    test_data = data.iloc[test_index].copy().reset_index(drop=True)

    # Module training
    who_mt = module_training_data[who_columns].copy()
    when_mt = module_training_data[when_columns].copy()

    who_mt_attr = who_mt.drop('majority_target', axis=1)
    when_mt_attr = when_mt.drop('majority_target', axis=1)

    who_mt_label = who_mt['majority_target']
    when_mt_label = when_mt['majority_target']

    # Training and evaluating the who model
    X_train, X_test, y_train, y_test = train_test_split(who_mt_attr, who_mt_label, test_size=0.2, random_state=42)
    who_model = RandomForestClassifier()
    who_model.fit(X_train, y_train)
    predictions = who_model.predict(X_test)
    who_metrics['accuracy'].append(accuracy_score(y_test, predictions))
    who_metrics['precision'].append(precision_score(y_test, predictions))
    who_metrics['recall'].append(recall_score(y_test, predictions))
    who_metrics['f1'].append(f1_score(y_test, predictions))
    print(f'Who model - Accuracy: {who_metrics["accuracy"][-1]}, Precision: {who_metrics["precision"][-1]}, Recall: {who_metrics["recall"][-1]}, F1: {who_metrics["f1"][-1]}')

    # Training and evaluating the when model
    X_train, X_test, y_train, y_test = train_test_split(when_mt_attr, when_mt_label, test_size=0.2, random_state=42)
    when_model = RandomForestClassifier()
    when_model.fit(X_train, y_train)
    predictions = when_model.predict(X_test)
    when_metrics['accuracy'].append(accuracy_score(y_test, predictions))
    when_metrics['precision'].append(precision_score(y_test, predictions))
    when_metrics['recall'].append(recall_score(y_test, predictions))
    when_metrics['f1'].append(f1_score(y_test, predictions))
    print(f'When model - Accuracy: {when_metrics["accuracy"][-1]}, Precision: {when_metrics["precision"][-1]}, Recall: {when_metrics["recall"][-1]}, F1: {when_metrics["f1"][-1]}')

    # CPT formation
    who_cp = cpt_data[who_columns].copy()
    when_cp = cpt_data[when_columns].copy()

    who_cp_attr = who_cp.drop('majority_target', axis=1)
    when_cp_attr = when_cp.drop('majority_target', axis=1)

    who_cp_label = who_cp['majority_target']
    when_cp_label = when_cp['majority_target']

    who_cp_pred = who_model.predict(who_cp_attr)
    when_cp_pred = when_model.predict(when_cp_attr)

    total_true_cp = np.sum(cpt_data['majority_target'])
    total_fake_cp = len(cpt_data) - np.sum(cpt_data['majority_target'])

    who_t_t = who_t_f = who_f_t = who_f_f = 0
    when_t_t = when_t_f = when_f_t = when_f_f = 0

    for i in range(len(cpt_data)):
        if who_cp_pred[i] == 1 and who_cp_label[i] == 1:
            who_t_t += 1
        if who_cp_pred[i] == 1 and who_cp_label[i] == 0:
            who_t_f += 1
        if who_cp_pred[i] == 0 and who_cp_label[i] == 1:
            who_f_t += 1
        if who_cp_pred[i] == 0 and who_cp_label[i] == 0:
            who_f_f += 1

        if when_cp_pred[i] == 1 and when_cp_label[i] == 1:
            when_t_t += 1
        if when_cp_pred[i] == 1 and when_cp_label[i] == 0:
            when_t_f += 1
        if when_cp_pred[i] == 0 and when_cp_label[i] == 1:
            when_f_t += 1
        if when_cp_pred[i] == 0 and when_cp_label[i] == 0:
            when_f_f += 1

    prob_who_t_t = who_t_t / total_true_cp
    prob_who_t_f = who_t_f / total_fake_cp
    prob_who_f_t = who_f_t / total_true_cp
    prob_who_f_f = who_f_f / total_fake_cp

    prob_when_t_t = when_t_t / total_true_cp
    prob_when_t_f = when_t_f / total_fake_cp
    prob_when_f_t = when_f_t / total_true_cp
    prob_when_f_f = when_f_f / total_fake_cp

    prob_t = total_true_cp / (total_true_cp + total_fake_cp)
    prob_f = total_fake_cp / (total_true_cp + total_fake_cp)

    # Bayesian network
    a = BbnNode(Variable(0, 'Authenticity', ['1', '0']), [prob_t, prob_f])
    who = BbnNode(Variable(1, 'Who', ['1', '0']), [prob_who_t_t, prob_who_f_t, prob_who_t_f, prob_who_f_f])
    when = BbnNode(Variable(2, 'What', ['1', '0']), [prob_when_t_t, prob_when_f_t, prob_when_t_f, prob_when_f_f])

    bbn = Bbn() \
        .add_node(a) \
        .add_node(who) \
        .add_node(when) \
        .add_edge(Edge(a, who, EdgeType.DIRECTED)) \
        .add_edge(Edge(a, when, EdgeType.DIRECTED))

    join_tree = InferenceController.apply(bbn)

    # Testing
    who_test = test_data[who_columns].copy()
    when_test = test_data[when_columns].copy()

    who_test_attr = who_test.drop('majority_target', axis=1)
    when_test_attr = when_test.drop('majority_target', axis=1)

    test_label = test_data['majority_target']

    who_test_pred = who_model.predict(who_test_attr)
    when_test_pred = when_model.predict(when_test_attr)

    who_test_pred = who_test_pred.astype(int)
    when_test_pred = when_test_pred.astype(int)

    bn_predictions = []
    for i in range(len(test_data)):
        join_tree = InferenceController.apply(bbn)
        who_ev = str(who_test_pred[i])
        when_ev = str(when_test_pred[i])

        ev1 = EvidenceBuilder() \
            .with_node(join_tree.get_bbn_node_by_name('Who')) \
            .with_evidence(who_ev, 1.0) \
            .build()
        join_tree.set_observation(ev1)

        ev2 = EvidenceBuilder() \
            .with_node(join_tree.get_bbn_node_by_name('What')) \
            .with_evidence(when_ev, 1.0) \
            .build()
        join_tree.set_observation(ev2)

        query_node = join_tree.get_bbn_node_by_name('Authenticity')
        potential = join_tree.get_bbn_potential(query_node)

        for node, posteriors in join_tree.get_posteriors().items():
            if node == 'Authenticity':
                if posteriors['1'] > posteriors['0']:
                    predicted_label = 1
                else:
                    predicted_label = 0
                bn_predictions.append(predicted_label)
                break

    bn_metrics['accuracy'].append(accuracy_score(test_label, bn_predictions))
    bn_metrics['precision'].append(precision_score(test_label, bn_predictions))
    bn_metrics['recall'].append(recall_score(test_label, bn_predictions))
    bn_metrics['f1'].append(f1_score(test_label, bn_predictions))

    print(f'BN model - Accuracy: {bn_metrics["accuracy"][-1]}, Precision: {bn_metrics["precision"][-1]}, Recall: {bn_metrics["recall"][-1]}, F1: {bn_metrics["f1"][-1]}')
    print("\n ----------------------------------------------------------------------------- \n")

# Calculate and print average metrics for each model
print("\nAverage Metrics:")
for model_name, metrics in [('Who', who_metrics), ('When', when_metrics), ('BN', bn_metrics)]:
    print(f'\n{model_name} model:')
    for metric_name, metric_values in metrics.items():
        avg_metric = np.mean(metric_values)
        print(f'  Average {metric_name}: {avg_metric:.4f}')


In [None]:
# when and what module

# Initialize lists to store metrics
when_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': []}
what_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': []}
bn_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': []}

kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=1)
for train_index, test_index in kf.split(data, data['majority_target']):
    np.random.shuffle(train_index)
    midpoint = len(train_index) // 2
    module_training = train_index[:midpoint]
    cpt_formation = train_index[midpoint:]
    module_training_data = data.iloc[module_training].copy().reset_index(drop=True)
    cpt_data = data.iloc[cpt_formation].copy().reset_index(drop=True)
    test_data = data.iloc[test_index].copy().reset_index(drop=True)

    # Module training
    when_mt = module_training_data[when_columns].copy()
    what_mt = module_training_data[what_columns].copy()

    when_mt_attr = when_mt.drop('majority_target', axis=1)
    what_mt_attr = what_mt.drop('majority_target', axis=1)

    when_mt_label = when_mt['majority_target']
    what_mt_label = what_mt['majority_target']

    # Training and evaluating the when model
    X_train, X_test, y_train, y_test = train_test_split(when_mt_attr, when_mt_label, test_size=0.2, random_state=42)
    when_model = RandomForestClassifier()
    when_model.fit(X_train, y_train)
    predictions = when_model.predict(X_test)
    when_metrics['accuracy'].append(accuracy_score(y_test, predictions))
    when_metrics['precision'].append(precision_score(y_test, predictions))
    when_metrics['recall'].append(recall_score(y_test, predictions))
    when_metrics['f1'].append(f1_score(y_test, predictions))
    print(f'When model - Accuracy: {when_metrics["accuracy"][-1]}, Precision: {when_metrics["precision"][-1]}, Recall: {when_metrics["recall"][-1]}, F1: {when_metrics["f1"][-1]}')

    # Training and evaluating the what model
    X_train, X_test, y_train, y_test = train_test_split(what_mt_attr, what_mt_label, test_size=0.2, random_state=42)
    what_model = RandomForestClassifier()
    what_model.fit(X_train, y_train)
    predictions = what_model.predict(X_test)
    what_metrics['accuracy'].append(accuracy_score(y_test, predictions))
    what_metrics['precision'].append(precision_score(y_test, predictions))
    what_metrics['recall'].append(recall_score(y_test, predictions))
    what_metrics['f1'].append(f1_score(y_test, predictions))
    print(f'What model - Accuracy: {what_metrics["accuracy"][-1]}, Precision: {what_metrics["precision"][-1]}, Recall: {what_metrics["recall"][-1]}, F1: {what_metrics["f1"][-1]}')

    # CPT formation
    when_cp = cpt_data[when_columns].copy()
    what_cp = cpt_data[what_columns].copy()

    when_cp_attr = when_cp.drop('majority_target', axis=1)
    what_cp_attr = what_cp.drop('majority_target', axis=1)

    when_cp_label = when_cp['majority_target']
    what_cp_label = what_cp['majority_target']

    when_cp_pred = when_model.predict(when_cp_attr)
    what_cp_pred = what_model.predict(what_cp_attr)

    total_true_cp = np.sum(cpt_data['majority_target'])
    total_fake_cp = len(cpt_data) - np.sum(cpt_data['majority_target'])

    when_t_t = when_t_f = when_f_t = when_f_f = 0
    what_t_t = what_t_f = what_f_t = what_f_f = 0

    for i in range(len(cpt_data)):
        if when_cp_pred[i] == 1 and when_cp_label[i] == 1:
            when_t_t += 1
        if when_cp_pred[i] == 1 and when_cp_label[i] == 0:
            when_t_f += 1
        if when_cp_pred[i] == 0 and when_cp_label[i] == 1:
            when_f_t += 1
        if when_cp_pred[i] == 0 and when_cp_label[i] == 0:
            when_f_f += 1

        if what_cp_pred[i] == 1 and what_cp_label[i] == 1:
            what_t_t += 1
        if what_cp_pred[i] == 1 and what_cp_label[i] == 0:
            what_t_f += 1
        if what_cp_pred[i] == 0 and what_cp_label[i] == 1:
            what_f_t += 1
        if what_cp_pred[i] == 0 and what_cp_label[i] == 0:
            what_f_f += 1

    prob_when_t_t = when_t_t / total_true_cp
    prob_when_t_f = when_t_f / total_fake_cp
    prob_when_f_t = when_f_t / total_true_cp
    prob_when_f_f = when_f_f / total_fake_cp

    prob_what_t_t = what_t_t / total_true_cp
    prob_what_t_f = what_t_f / total_fake_cp
    prob_what_f_t = what_f_t / total_true_cp
    prob_what_f_f = what_f_f / total_fake_cp

    prob_t = total_true_cp / (total_true_cp + total_fake_cp)
    prob_f = total_fake_cp / (total_true_cp + total_fake_cp)

    # Bayesian network
    a = BbnNode(Variable(0, 'Authenticity', ['1', '0']), [prob_t, prob_f])
    when = BbnNode(Variable(1, 'Who', ['1', '0']), [prob_when_t_t, prob_when_f_t, prob_when_t_f, prob_when_f_f])
    what = BbnNode(Variable(2, 'What', ['1', '0']), [prob_what_t_t, prob_what_f_t, prob_what_t_f, prob_what_f_f])

    bbn = Bbn() \
        .add_node(a) \
        .add_node(when) \
        .add_node(what) \
        .add_edge(Edge(a, when, EdgeType.DIRECTED)) \
        .add_edge(Edge(a, what, EdgeType.DIRECTED))

    join_tree = InferenceController.apply(bbn)

    # Testing
    when_test = test_data[when_columns].copy()
    what_test = test_data[what_columns].copy()

    when_test_attr = when_test.drop('majority_target', axis=1)
    what_test_attr = what_test.drop('majority_target', axis=1)

    test_label = test_data['majority_target']

    when_test_pred = when_model.predict(when_test_attr)
    what_test_pred = what_model.predict(what_test_attr)

    when_test_pred = when_test_pred.astype(int)
    what_test_pred = what_test_pred.astype(int)

    bn_predictions = []
    for i in range(len(test_data)):
        join_tree = InferenceController.apply(bbn)
        when_ev = str(when_test_pred[i])
        what_ev = str(what_test_pred[i])

        ev1 = EvidenceBuilder() \
            .with_node(join_tree.get_bbn_node_by_name('Who')) \
            .with_evidence(when_ev, 1.0) \
            .build()
        join_tree.set_observation(ev1)

        ev2 = EvidenceBuilder() \
            .with_node(join_tree.get_bbn_node_by_name('What')) \
            .with_evidence(what_ev, 1.0) \
            .build()
        join_tree.set_observation(ev2)

        query_node = join_tree.get_bbn_node_by_name('Authenticity')
        potential = join_tree.get_bbn_potential(query_node)

        for node, posteriors in join_tree.get_posteriors().items():
            if node == 'Authenticity':
                if posteriors['1'] > posteriors['0']:
                    predicted_label = 1
                else:
                    predicted_label = 0
                bn_predictions.append(predicted_label)
                break

    bn_metrics['accuracy'].append(accuracy_score(test_label, bn_predictions))
    bn_metrics['precision'].append(precision_score(test_label, bn_predictions))
    bn_metrics['recall'].append(recall_score(test_label, bn_predictions))
    bn_metrics['f1'].append(f1_score(test_label, bn_predictions))

    print(f'BN model - Accuracy: {bn_metrics["accuracy"][-1]}, Precision: {bn_metrics["precision"][-1]}, Recall: {bn_metrics["recall"][-1]}, F1: {bn_metrics["f1"][-1]}')
    print("\n ----------------------------------------------------------------------------- \n")

# Calculate and print average metrics for each model
print("\nAverage Metrics:")
for model_name, metrics in [('When', when_metrics), ('What', what_metrics), ('BN', bn_metrics)]:
    print(f'\n{model_name} model:')
    for metric_name, metric_values in metrics.items():
        avg_metric = np.mean(metric_values)
        print(f'  Average {metric_name}: {avg_metric:.4f}')


In [None]:
# who module

# Initialize lists to store metrics
who_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': []}
bn_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': []}

kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=1)
for train_index, test_index in kf.split(data, data['majority_target']):
    np.random.shuffle(train_index)
    midpoint = len(train_index) // 2
    module_training = train_index[:midpoint]
    cpt_formation = train_index[midpoint:]
    module_training_data = data.iloc[module_training].copy().reset_index(drop=True)
    cpt_data = data.iloc[cpt_formation].copy().reset_index(drop=True)
    test_data = data.iloc[test_index].copy().reset_index(drop=True)

    # Module training
    who_mt = module_training_data[who_columns].copy()

    who_mt_attr = who_mt.drop('majority_target', axis=1)

    who_mt_label = who_mt['majority_target']

    # Training and evaluating the who model
    X_train, X_test, y_train, y_test = train_test_split(who_mt_attr, who_mt_label, test_size=0.2, random_state=42)
    who_model = RandomForestClassifier()
    who_model.fit(X_train, y_train)
    predictions = who_model.predict(X_test)
    who_metrics['accuracy'].append(accuracy_score(y_test, predictions))
    who_metrics['precision'].append(precision_score(y_test, predictions))
    who_metrics['recall'].append(recall_score(y_test, predictions))
    who_metrics['f1'].append(f1_score(y_test, predictions))
    print(f'Who model - Accuracy: {who_metrics["accuracy"][-1]}, Precision: {who_metrics["precision"][-1]}, Recall: {who_metrics["recall"][-1]}, F1: {who_metrics["f1"][-1]}')

    # CPT formation
    who_cp = cpt_data[who_columns].copy()

    who_cp_attr = who_cp.drop('majority_target', axis=1)

    who_cp_label = who_cp['majority_target']

    who_cp_pred = who_model.predict(who_cp_attr)

    total_true_cp = np.sum(cpt_data['majority_target'])
    total_fake_cp = len(cpt_data) - np.sum(cpt_data['majority_target'])

    who_t_t = who_t_f = who_f_t = who_f_f = 0

    for i in range(len(cpt_data)):
        if who_cp_pred[i] == 1 and who_cp_label[i] == 1:
            who_t_t += 1
        if who_cp_pred[i] == 1 and who_cp_label[i] == 0:
            who_t_f += 1
        if who_cp_pred[i] == 0 and who_cp_label[i] == 1:
            who_f_t += 1
        if who_cp_pred[i] == 0 and who_cp_label[i] == 0:
            who_f_f += 1

    prob_who_t_t = who_t_t / total_true_cp
    prob_who_t_f = who_t_f / total_fake_cp
    prob_who_f_t = who_f_t / total_true_cp
    prob_who_f_f = who_f_f / total_fake_cp

    prob_t = total_true_cp / (total_true_cp + total_fake_cp)
    prob_f = total_fake_cp / (total_true_cp + total_fake_cp)

    # Bayesian network
    a = BbnNode(Variable(0, 'Authenticity', ['1', '0']), [prob_t, prob_f])
    who = BbnNode(Variable(1, 'Who', ['1', '0']), [prob_who_t_t, prob_who_f_t, prob_who_t_f, prob_who_f_f])

    bbn = Bbn() \
        .add_node(a) \
        .add_node(who) \
        .add_edge(Edge(a, who, EdgeType.DIRECTED))

    join_tree = InferenceController.apply(bbn)

    # Testing
    who_test = test_data[who_columns].copy()

    who_test_attr = who_test.drop('majority_target', axis=1)

    test_label = test_data['majority_target']

    who_test_pred = who_model.predict(who_test_attr)

    who_test_pred = who_test_pred.astype(int)

    bn_predictions = []
    for i in range(len(test_data)):
        join_tree = InferenceController.apply(bbn)
        who_ev = str(who_test_pred[i])

        ev1 = EvidenceBuilder() \
            .with_node(join_tree.get_bbn_node_by_name('Who')) \
            .with_evidence(who_ev, 1.0) \
            .build()
        join_tree.set_observation(ev1)

        query_node = join_tree.get_bbn_node_by_name('Authenticity')
        potential = join_tree.get_bbn_potential(query_node)

        for node, posteriors in join_tree.get_posteriors().items():
            if node == 'Authenticity':
                if posteriors['1'] > posteriors['0']:
                    predicted_label = 1
                else:
                    predicted_label = 0
                bn_predictions.append(predicted_label)
                break

    bn_metrics['accuracy'].append(accuracy_score(test_label, bn_predictions))
    bn_metrics['precision'].append(precision_score(test_label, bn_predictions))
    bn_metrics['recall'].append(recall_score(test_label, bn_predictions))
    bn_metrics['f1'].append(f1_score(test_label, bn_predictions))

    print(f'BN model - Accuracy: {bn_metrics["accuracy"][-1]}, Precision: {bn_metrics["precision"][-1]}, Recall: {bn_metrics["recall"][-1]}, F1: {bn_metrics["f1"][-1]}')
    print("\n ----------------------------------------------------------------------------- \n")

# Calculate and print average metrics for each model
print("\nAverage Metrics:")
for model_name, metrics in [('Who', who_metrics), ('BN', bn_metrics)]:
    print(f'\n{model_name} model:')
    for metric_name, metric_values in metrics.items():
        avg_metric = np.mean(metric_values)
        print(f'  Average {metric_name}: {avg_metric:.4f}')

In [None]:
# what module

# Initialize lists to store metrics
what_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': []}
bn_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': []}

kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=1)
for train_index, test_index in kf.split(data, data['majority_target']):
    np.random.shuffle(train_index)
    midpoint = len(train_index) // 2
    module_training = train_index[:midpoint]
    cpt_formation = train_index[midpoint:]
    module_training_data = data.iloc[module_training].copy().reset_index(drop=True)
    cpt_data = data.iloc[cpt_formation].copy().reset_index(drop=True)
    test_data = data.iloc[test_index].copy().reset_index(drop=True)

    # Module training
    what_mt = module_training_data[what_columns].copy()

    what_mt_attr = what_mt.drop('majority_target', axis=1)

    what_mt_label = what_mt['majority_target']

    # Training and evaluating the what model
    X_train, X_test, y_train, y_test = train_test_split(what_mt_attr, what_mt_label, test_size=0.2, random_state=42)
    what_model = RandomForestClassifier()
    what_model.fit(X_train, y_train)
    predictions = what_model.predict(X_test)
    what_metrics['accuracy'].append(accuracy_score(y_test, predictions))
    what_metrics['precision'].append(precision_score(y_test, predictions))
    what_metrics['recall'].append(recall_score(y_test, predictions))
    what_metrics['f1'].append(f1_score(y_test, predictions))
    print(f'What model - Accuracy: {what_metrics["accuracy"][-1]}, Precision: {what_metrics["precision"][-1]}, Recall: {what_metrics["recall"][-1]}, F1: {what_metrics["f1"][-1]}')

    # CPT formation
    what_cp = cpt_data[what_columns].copy()

    what_cp_attr = what_cp.drop('majority_target', axis=1)

    what_cp_label = what_cp['majority_target']

    what_cp_pred = what_model.predict(what_cp_attr)

    total_true_cp = np.sum(cpt_data['majority_target'])
    total_fake_cp = len(cpt_data) - np.sum(cpt_data['majority_target'])

    what_t_t = what_t_f = what_f_t = what_f_f = 0

    for i in range(len(cpt_data)):
        if what_cp_pred[i] == 1 and what_cp_label[i] == 1:
            what_t_t += 1
        if what_cp_pred[i] == 1 and what_cp_label[i] == 0:
            what_t_f += 1
        if what_cp_pred[i] == 0 and what_cp_label[i] == 1:
            what_f_t += 1
        if what_cp_pred[i] == 0 and what_cp_label[i] == 0:
            what_f_f += 1

    prob_what_t_t = what_t_t / total_true_cp
    prob_what_t_f = what_t_f / total_fake_cp
    prob_what_f_t = what_f_t / total_true_cp
    prob_what_f_f = what_f_f / total_fake_cp

    prob_t = total_true_cp / (total_true_cp + total_fake_cp)
    prob_f = total_fake_cp / (total_true_cp + total_fake_cp)

    # Bayesian network
    a = BbnNode(Variable(0, 'Authenticity', ['1', '0']), [prob_t, prob_f])
    what = BbnNode(Variable(1, 'Who', ['1', '0']), [prob_what_t_t, prob_what_f_t, prob_what_t_f, prob_what_f_f])

    bbn = Bbn() \
        .add_node(a) \
        .add_node(what) \
        .add_edge(Edge(a, what, EdgeType.DIRECTED))

    join_tree = InferenceController.apply(bbn)

    # Testing
    what_test = test_data[what_columns].copy()

    what_test_attr = what_test.drop('majority_target', axis=1)

    test_label = test_data['majority_target']

    what_test_pred = what_model.predict(what_test_attr)

    what_test_pred = what_test_pred.astype(int)

    bn_predictions = []
    for i in range(len(test_data)):
        join_tree = InferenceController.apply(bbn)
        what_ev = str(what_test_pred[i])

        ev1 = EvidenceBuilder() \
            .with_node(join_tree.get_bbn_node_by_name('Who')) \
            .with_evidence(what_ev, 1.0) \
            .build()
        join_tree.set_observation(ev1)

        query_node = join_tree.get_bbn_node_by_name('Authenticity')
        potential = join_tree.get_bbn_potential(query_node)

        for node, posteriors in join_tree.get_posteriors().items():
            if node == 'Authenticity':
                if posteriors['1'] > posteriors['0']:
                    predicted_label = 1
                else:
                    predicted_label = 0
                bn_predictions.append(predicted_label)
                break

    bn_metrics['accuracy'].append(accuracy_score(test_label, bn_predictions))
    bn_metrics['precision'].append(precision_score(test_label, bn_predictions))
    bn_metrics['recall'].append(recall_score(test_label, bn_predictions))
    bn_metrics['f1'].append(f1_score(test_label, bn_predictions))

    print(f'BN model - Accuracy: {bn_metrics["accuracy"][-1]}, Precision: {bn_metrics["precision"][-1]}, Recall: {bn_metrics["recall"][-1]}, F1: {bn_metrics["f1"][-1]}')
    print("\n ----------------------------------------------------------------------------- \n")

# Calculate and print average metrics for each model
print("\nAverage Metrics:")
for model_name, metrics in [('What', what_metrics), ('BN', bn_metrics)]:
    print(f'\n{model_name} model:')
    for metric_name, metric_values in metrics.items():
        avg_metric = np.mean(metric_values)
        print(f'  Average {metric_name}: {avg_metric:.4f}')

In [None]:
# when module

# Initialize lists to store metrics
when_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': []}
bn_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': []}

kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=1)
for train_index, test_index in kf.split(data, data['majority_target']):
    np.random.shuffle(train_index)
    midpoint = len(train_index) // 2
    module_training = train_index[:midpoint]
    cpt_formation = train_index[midpoint:]
    module_training_data = data.iloc[module_training].copy().reset_index(drop=True)
    cpt_data = data.iloc[cpt_formation].copy().reset_index(drop=True)
    test_data = data.iloc[test_index].copy().reset_index(drop=True)

    # Module training
    when_mt = module_training_data[when_columns].copy()

    when_mt_attr = when_mt.drop('majority_target', axis=1)

    when_mt_label = when_mt['majority_target']

    # Training and evaluating the when model
    X_train, X_test, y_train, y_test = train_test_split(when_mt_attr, when_mt_label, test_size=0.2, random_state=42)
    when_model = RandomForestClassifier()
    when_model.fit(X_train, y_train)
    predictions = when_model.predict(X_test)
    when_metrics['accuracy'].append(accuracy_score(y_test, predictions))
    when_metrics['precision'].append(precision_score(y_test, predictions))
    when_metrics['recall'].append(recall_score(y_test, predictions))
    when_metrics['f1'].append(f1_score(y_test, predictions))
    print(f'When model - Accuracy: {when_metrics["accuracy"][-1]}, Precision: {when_metrics["precision"][-1]}, Recall: {when_metrics["recall"][-1]}, F1: {when_metrics["f1"][-1]}')

    # CPT formation
    when_cp = cpt_data[when_columns].copy()

    when_cp_attr = when_cp.drop('majority_target', axis=1)

    when_cp_label = when_cp['majority_target']

    when_cp_pred = when_model.predict(when_cp_attr)

    total_true_cp = np.sum(cpt_data['majority_target'])
    total_fake_cp = len(cpt_data) - np.sum(cpt_data['majority_target'])

    when_t_t = when_t_f = when_f_t = when_f_f = 0

    for i in range(len(cpt_data)):
        if when_cp_pred[i] == 1 and when_cp_label[i] == 1:
            when_t_t += 1
        if when_cp_pred[i] == 1 and when_cp_label[i] == 0:
            when_t_f += 1
        if when_cp_pred[i] == 0 and when_cp_label[i] == 1:
            when_f_t += 1
        if when_cp_pred[i] == 0 and when_cp_label[i] == 0:
            when_f_f += 1

    prob_when_t_t = when_t_t / total_true_cp
    prob_when_t_f = when_t_f / total_fake_cp
    prob_when_f_t = when_f_t / total_true_cp
    prob_when_f_f = when_f_f / total_fake_cp

    prob_t = total_true_cp / (total_true_cp + total_fake_cp)
    prob_f = total_fake_cp / (total_true_cp + total_fake_cp)

    # Bayesian network
    a = BbnNode(Variable(0, 'Authenticity', ['1', '0']), [prob_t, prob_f])
    when = BbnNode(Variable(1, 'Who', ['1', '0']), [prob_when_t_t, prob_when_f_t, prob_when_t_f, prob_when_f_f])

    bbn = Bbn() \
        .add_node(a) \
        .add_node(when) \
        .add_edge(Edge(a, when, EdgeType.DIRECTED))

    join_tree = InferenceController.apply(bbn)

    # Testing
    when_test = test_data[when_columns].copy()

    when_test_attr = when_test.drop('majority_target', axis=1)

    test_label = test_data['majority_target']

    when_test_pred = when_model.predict(when_test_attr)

    when_test_pred = when_test_pred.astype(int)

    bn_predictions = []
    for i in range(len(test_data)):
        join_tree = InferenceController.apply(bbn)
        when_ev = str(when_test_pred[i])

        ev1 = EvidenceBuilder() \
            .with_node(join_tree.get_bbn_node_by_name('Who')) \
            .with_evidence(when_ev, 1.0) \
            .build()
        join_tree.set_observation(ev1)

        query_node = join_tree.get_bbn_node_by_name('Authenticity')
        potential = join_tree.get_bbn_potential(query_node)

        for node, posteriors in join_tree.get_posteriors().items():
            if node == 'Authenticity':
                if posteriors['1'] > posteriors['0']:
                    predicted_label = 1
                else:
                    predicted_label = 0
                bn_predictions.append(predicted_label)
                break

    bn_metrics['accuracy'].append(accuracy_score(test_label, bn_predictions))
    bn_metrics['precision'].append(precision_score(test_label, bn_predictions))
    bn_metrics['recall'].append(recall_score(test_label, bn_predictions))
    bn_metrics['f1'].append(f1_score(test_label, bn_predictions))

    print(f'BN model - Accuracy: {bn_metrics["accuracy"][-1]}, Precision: {bn_metrics["precision"][-1]}, Recall: {bn_metrics["recall"][-1]}, F1: {bn_metrics["f1"][-1]}')
    print("\n ----------------------------------------------------------------------------- \n")

# Calculate and print average metrics for each model
print("\nAverage Metrics:")
for model_name, metrics in [('When', when_metrics), ('BN', bn_metrics)]:
    print(f'\n{model_name} model:')
    for metric_name, metric_values in metrics.items():
        avg_metric = np.mean(metric_values)
        print(f'  Average {metric_name}: {avg_metric:.4f}')

In [None]:
# who (RF)

# Initialize lists to store metrics
who_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': []}

kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=1)
for train_index, test_index in kf.split(data, data['majority_target']):
    train_data = data.iloc[train_index].copy().reset_index(drop=True)
    test_data = data.iloc[test_index].copy().reset_index(drop=True)

    # Training the 'who' model
    who_mt = train_data[who_columns].copy()
    who_mt_attr = who_mt.drop('majority_target', axis=1)
    who_mt_label = who_mt['majority_target']

    who_model = RandomForestClassifier()
    who_model.fit(who_mt_attr, who_mt_label)

    # Evaluating the who model on the test set
    who_test = test_data[who_columns].copy()
    who_test_attr = who_test.drop('majority_target', axis=1)
    test_label = test_data['majority_target']

    predictions = who_model.predict(who_test_attr)

    who_metrics['accuracy'].append(accuracy_score(test_label, predictions))
    who_metrics['precision'].append(precision_score(test_label, predictions))
    who_metrics['recall'].append(recall_score(test_label, predictions))
    who_metrics['f1'].append(f1_score(test_label, predictions))

    print(f'Who model - Accuracy: {who_metrics["accuracy"][-1]}, Precision: {who_metrics["precision"][-1]}, Recall: {who_metrics["recall"][-1]}, F1: {who_metrics["f1"][-1]}')
    print("\n ----------------------------------------------------------------------------- \n")

# Calculate and print average metrics
print("\nAverage Metrics:")
for model_name, metrics in [('Who', who_metrics)]:
    print(f'\n{model_name} model:')
    for metric_name, metric_values in metrics.items():
        avg_metric = np.mean(metric_values)
        print(f'  Average {metric_name}: {avg_metric:.4f}')


In [None]:
# when (RF)

# Initialize lists to store metrics
when_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': []}

kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=1)
for train_index, test_index in kf.split(data, data['majority_target']):
    train_data = data.iloc[train_index].copy().reset_index(drop=True)
    test_data = data.iloc[test_index].copy().reset_index(drop=True)

    # Training the 'when' model
    when_mt = train_data[when_columns].copy()
    when_mt_attr = when_mt.drop('majority_target', axis=1)
    when_mt_label = when_mt['majority_target']

    when_model = RandomForestClassifier()
    when_model.fit(when_mt_attr, when_mt_label)

    # Evaluating the when model on the test set
    when_test = test_data[when_columns].copy()
    when_test_attr = when_test.drop('majority_target', axis=1)
    test_label = test_data['majority_target']

    predictions = when_model.predict(when_test_attr)

    when_metrics['accuracy'].append(accuracy_score(test_label, predictions))
    when_metrics['precision'].append(precision_score(test_label, predictions))
    when_metrics['recall'].append(recall_score(test_label, predictions))
    when_metrics['f1'].append(f1_score(test_label, predictions))

    print(f'When model - Accuracy: {when_metrics["accuracy"][-1]}, Precision: {when_metrics["precision"][-1]}, Recall: {when_metrics["recall"][-1]}, F1: {when_metrics["f1"][-1]}')
    print("\n ----------------------------------------------------------------------------- \n")

# Calculate and print average metrics
print("\nAverage Metrics:")
for model_name, metrics in [('When', when_metrics)]:
    print(f'\n{model_name} model:')
    for metric_name, metric_values in metrics.items():
        avg_metric = np.mean(metric_values)
        print(f'  Average {metric_name}: {avg_metric:.4f}')


In [None]:
# what (RF)

# Initialize lists to store metrics
what_metrics = {'accuracy': [], 'precision': [], 'recall': [], 'f1': []}

kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=1)
for train_index, test_index in kf.split(data, data['majority_target']):
    train_data = data.iloc[train_index].copy().reset_index(drop=True)
    test_data = data.iloc[test_index].copy().reset_index(drop=True)

    # Training the 'what' model
    what_mt = train_data[what_columns].copy()
    what_mt_attr = what_mt.drop('majority_target', axis=1)
    what_mt_label = what_mt['majority_target']

    what_model = RandomForestClassifier()
    what_model.fit(what_mt_attr, what_mt_label)

    # Evaluating the what model on the test set
    what_test = test_data[what_columns].copy()
    what_test_attr = what_test.drop('majority_target', axis=1)
    test_label = test_data['majority_target']

    predictions = what_model.predict(what_test_attr)

    what_metrics['accuracy'].append(accuracy_score(test_label, predictions))
    what_metrics['precision'].append(precision_score(test_label, predictions))
    what_metrics['recall'].append(recall_score(test_label, predictions))
    what_metrics['f1'].append(f1_score(test_label, predictions))

    print(f'What model - Accuracy: {what_metrics["accuracy"][-1]}, Precision: {what_metrics["precision"][-1]}, Recall: {what_metrics["recall"][-1]}, F1: {what_metrics["f1"][-1]}')
    print("\n ----------------------------------------------------------------------------- \n")

# Calculate and print average metrics
print("\nAverage Metrics:")
for model_name, metrics in [('What', what_metrics)]:
    print(f'\n{model_name} model:')
    for metric_name, metric_values in metrics.items():
        avg_metric = np.mean(metric_values)
        print(f'  Average {metric_name}: {avg_metric:.4f}')
