In [14]:
import numpy as np
import pandas as pd
import itertools
import ast
import warnings

warnings.filterwarnings("ignore")

In [15]:
def calculate_probs_of_vertices(dirname):
    with open(dirname + "Dependency.txt") as f:
        data = f.read()

    dependencies = ast.literal_eval(data)
    vertices_probs = {}
    for vertex in dependencies:
        dependent_vertices = dependencies[vertex]
        vertices_probs[vertex] = {}
        if (len(dependent_vertices) == 0):
            Vertex_Probs = pd.read_csv(dirname + vertex + "_Probs.csv").set_index(vertex)
            for i in list(Vertex_Probs.index):
                vertices_probs[vertex][i] = Vertex_Probs["Probability"][i]
        elif (len(dependent_vertices) == 1):
            Dependent_Vertex = dependent_vertices[0]
            Current_Probs = pd.read_csv(dirname + vertex + "_" + Dependent_Vertex + "_Probs.csv")
            if (Current_Probs[Dependent_Vertex].values.dtype == bool):
                Current_Probs[Dependent_Vertex] = list(map(str, Current_Probs[Dependent_Vertex]))
            Current_Probs.set_index(Dependent_Vertex, inplace=True)
            Target_States = list(Current_Probs.columns)
            Dependent_States = list(vertices_probs[Dependent_Vertex].keys())
            for Target_State in Target_States:
                vertices_probs[vertex][Target_State] = 0
                for Dependent_State in Dependent_States:
                    vertices_probs[vertex][Target_State] += Current_Probs[Target_State][Dependent_State] * vertices_probs[Dependent_Vertex][Dependent_State]
        elif (len(dependent_vertices) > 1):
            Current_Probs = pd.read_csv(dirname + vertex + "_" + "_".join(dependent_vertices) + "_Probs.csv")
            for col_name in dependent_vertices:
                if (Current_Probs[col_name].values.dtype == bool):
                    Current_Probs[col_name] = list(map(str, Current_Probs[col_name]))
            Current_Probs.set_index(dependent_vertices, inplace=True)
            Indices = list(Current_Probs.index)
            Target_States = list(Current_Probs.columns)
            for Target_State in Target_States:
                vertices_probs[vertex][Target_State] = 0
                for Index in Indices:
                    Marg_Probs = []
                    for (dep_ver,dep_ver_state) in list(zip(dependent_vertices,Index)):
                        Marg_Probs.append(vertices_probs[dep_ver][dep_ver_state])
                    vertices_probs[vertex][Target_State] += np.prod(Marg_Probs) * Current_Probs[Target_State][Index]
    return vertices_probs

In [16]:
def calculate_joint_probability_of_BN(dirname):
    with open(dirname + "Dependency.txt") as f:
        data = f.read()

    dependencies = ast.literal_eval(data)
    vertices = list(dependencies.keys())
    vertices_all_states = []
    for vertex in vertices:
        vertex_states = pd.read_csv(dirname + vertex + "_States.csv")
        vertex_states = list(vertex_states[vertex].values)
        vertex_states = list(map(str, vertex_states))
        vertices_all_states.append(vertex_states)
    vertices_all_states = list(itertools.product(*vertices_all_states))
    Table = pd.DataFrame(data=vertices_all_states, columns=vertices)
    Table["Probability"] = 1
    for i in range(len(Table)):
        Vertices_States_Set = list(zip(vertices,list(Table[vertices].iloc[i].values)))
        for (Vertex,Vertex_state) in Vertices_States_Set:
            Dependent_Vertices = dependencies[Vertex]
            if (len(Dependent_Vertices) == 0):
                Probs = pd.read_csv(dirname + Vertex + "_Probs.csv").set_index(Vertex)
                Table["Probability"].iloc[i] *= Probs["Probability"][Vertex_state]
            elif (len(Dependent_Vertices) == 1):
                Dependent_Vertex = Dependent_Vertices[0]
                Dependent_Vertex_State = Table[Dependent_Vertex].iloc[i]
                Probs = pd.read_csv(dirname + Vertex + "_" + Dependent_Vertex + "_Probs.csv")
                if (Probs[Dependent_Vertex].values.dtype == bool):
                    Probs[Dependent_Vertex] = list(map(str, Probs[Dependent_Vertex]))
                Probs.set_index(Dependent_Vertex, inplace=True)
                Table["Probability"].iloc[i] *= Probs[Vertex_state][Dependent_Vertex_State]
            elif (len(Dependent_Vertices) > 1):
                Dependent_Vertices_State = Table[Dependent_Vertices].iloc[i]
                Probs = pd.read_csv(dirname + Vertex + "_" + "_".join(Dependent_Vertices) + "_Probs.csv")
                for col_name in Dependent_Vertices:
                    if (Probs[col_name].values.dtype == bool):
                        Probs[col_name] = list(map(str, Probs[col_name]))
                Probs.set_index(Dependent_Vertices, inplace=True)
                Table["Probability"].iloc[i] *= Probs.loc[tuple(Dependent_Vertices_State), Vertex_state]
    Table.index = range(1,len(Table) + 1)
    Table.index.name = "Observation"
    Table.to_csv(dirname[:-1] +"_Results.csv")


In [17]:
calculate_probs_of_vertices("Animal/")

{'Animal': {'Monkey': 0.2,
  'Penguin': 0.2,
  'Platypus': 0.2,
  'Robin': 0.2,
  'Turtle': 0.2},
 'Environment': {'Air': 0.1, 'Land': 0.5, 'Water': 0.4},
 'HasShell': {'True': 0.2, 'False': 0.8},
 'BearsYoungAs': {'Live': 0.2, 'Eggs': 0.8},
 'Class': {'Bird': 0.4, 'Mammal': 0.4, 'Reptile': 0.2},
 'WarmBlooded': {'True': 0.8, 'False': 0.2},
 'BodyCovering': {'Fur': 0.4, 'Feathers': 0.4, 'Scales': 0.2}}

In [18]:
calculate_joint_probability_of_BN("Animal/")

In [12]:
calculate_probs_of_vertices("Asia/")

{'VisitAsia': {'Visit': 0.01, 'NoVisit': 0.99},
 'Smoking': {'Smoking': 0.5, 'NoSmoking': 0.5},
 'Tuberculosis': {'Present': 0.010400000000000001,
  'Absent': 0.9895999999999999},
 'LungCancer': {'Present': 0.055, 'Absent': 0.9450000000000001},
 'Tb_or_Ca': {'True': 0.064828, 'False': 0.935172},
 'XRay': {'Abnormal': 0.11029004, 'Normal': 0.8897099599999999},
 'Bronchitis': {'Present': 0.44999999999999996, 'Absent': 0.55},
 'Dyspnea': {'True': 0.4393105, 'False': 0.5606895000000001}}

In [13]:
calculate_joint_probability_of_BN("Asia/")

Before:  [['Visit', 'NoVisit'], ['Smoking', 'NoSmoking'], ['Present', 'Absent'], ['Present', 'Absent'], ['True', 'False'], ['Abnormal', 'Normal'], ['Present', 'Absent'], ['True', 'False']]
