In [13]:
import pandas as pd

from BNReasoner import BNReasoner
from BayesNet import BayesNet

In [6]:
NETWORK = "testing/lecture_example2.BIFXML"
reasoner = BNReasoner(NETWORK)
cpts = reasoner.bn.get_all_cpts()

In [9]:
cpts['X']

Unnamed: 0,J,I,X,p
0,False,False,False,0.95
1,False,False,True,0.05
2,False,True,False,0.95
3,False,True,True,0.05
4,True,False,False,0.95
5,True,False,True,0.05
6,True,True,False,0.05
7,True,True,True,0.95


In [60]:
f3 = {
    'A': [True] * 4 + [False] * 4, 
    'B': [True, True, False, False, True, True, False, False],
    'C': [True, False] * 4,
    'p': [0.03, 0.07, 0.54, 0.36, 0.06, 0.14, 0.48, 0.32]
}
f3 = pd.DataFrame.from_dict(f3)

### Summing out

$$
f_{3}=\begin{array}{|lll|r|}
A & B & C & \text { val } \\
- & - & - & -- \\
t & t & t & 0.03 \\
t & t & f & 0.07 \\
t & f & t & 0.54 \\
t & f & f & 0.36 \\
f & t & t & 0.06 \\
f & t & f & 0.14 \\
f & f & t & 0.48 \\
f & f & f & 0.32 \\
\end{array}
\quad\quad\quad\quad
\sum_{B} f_{3}=\begin{array}{|ll|r|}
A & C & \text { val } \\
- & - & --\\
t & t & 0.57 \\
t & f & 0.43 \\
f & t & 0.54 \\
f & f & 0.46
\end{array}
$$
Example of summing out variable $B$ from a factor $f_3\left(A,B,C\right)$, which is a factor on $A, C$.
$$
\begin{aligned}
\left(\sum_{B} f_{3}\right)(A=t, C=f) &=f_{3}(A=t, B=t, C=f)+f_{3}(A=t, B=f, C=f) \\
&=0.07+0.36 \\
&=0.43
\end{aligned}
$$

In [57]:
def sum_out_var(cpt: pd.DataFrame, variable: str) -> pd.DataFrame:
    """Sum out a variable from a conditional probability table (CPT)

    Args:
        cpt (pd.DataFrame): Pandas DataFrame representation of the cpt
        variable (str): Variable to sum out

    Returns:
        pd.DataFrame: cpt after summing out the variable
    """
    mask = cpt[variable] == True
    var_true_df = cpt[mask].drop(variable, axis=1)
    var_false_df = cpt[~mask].drop(variable, axis=1)

    columns = [col for col in var_true_df.columns if col != 'p']

    resulting_df = pd.concat([var_true_df, var_false_df]).groupby(columns, as_index=False)["p"].sum()
    return resulting_df

In [118]:
def multiply_factors(cpt1: pd.DataFrame, cpt2: pd.DataFrame) -> pd.DataFrame:
    """Multiply two given factors

    Args:
        cpt1 (pd.DataFrame): Pandas DataFrame representation of first cpt
        cpt1 (pd.DataFrame): Pandas DataFrame representation of second cpt

    Returns:
        pd.DataFrame: cpt after multiplying cpt1 and cpt2
    """
    common_vars = list(set([col for col in f1.columns if col != 'p']) & set([col for col in f2.columns if col != 'p']))

    merged_df = pd.merge(cpt1, cpt2, on=common_vars)
    merged_df['p'] = (merged_df['p_x'] * merged_df['p_y'])
    merged_df.drop(['p_x', 'p_y'], inplace=True, axis=1)

    return merged_df


In [66]:
f1 = {
    'B': [True] * 4 + [False] * 4, 
    'C': [True, True, False, False] * 2,
    'D': [True, False] * 4,
    'p': [0.95, 0.05, 0.9, 0.1, 0.8, 0.2, 0, 1]
}
f1 = pd.DataFrame.from_dict(f1)
f2 = {
    'D': [True, True, False, False],
    'E': [True, False, True, False],
    'p': [0.448, 0.192, 0.112, 0.248]
}
f2 = pd.DataFrame.from_dict(f2)

In [121]:
merged_df = pd.merge(f1, f2, on='D')
merged_df

Unnamed: 0,B,C,D,p_x,E,p_y
0,True,True,True,0.95,True,0.448
1,True,True,True,0.95,False,0.192
2,True,False,True,0.9,True,0.448
3,True,False,True,0.9,False,0.192
4,False,True,True,0.8,True,0.448
5,False,True,True,0.8,False,0.192
6,False,False,True,0.0,True,0.448
7,False,False,True,0.0,False,0.192
8,True,True,False,0.05,True,0.112
9,True,True,False,0.05,False,0.248


In [122]:
res = multiply_factors(
    cpt1 = f1,
    cpt2=f2,
)
res

Unnamed: 0,B,C,D,E,p
0,True,True,True,True,0.4256
1,True,True,True,False,0.1824
2,True,False,True,True,0.4032
3,True,False,True,False,0.1728
4,False,True,True,True,0.3584
5,False,True,True,False,0.1536
6,False,False,True,True,0.0
7,False,False,True,False,0.0
8,True,True,False,True,0.0056
9,True,True,False,False,0.0124


In [104]:
common_vars = list(set([col for col in f1.columns if col != 'p']) & set([col for col in f2.columns if col != 'p']))
common_vars

{'D'}

In [103]:
merged_df = pd.merge(f1, f2, on='D')
merged_df['p'] = (merged_df['p_x'] * merged_df['p_y'])
merged_df.drop(['p_x', 'p_y'], inplace=True, axis=1)


ValueError: No axis named 1 for object type Series

In [102]:
merged_df

Unnamed: 0,B,C,D,E,p
0,True,True,True,True,0.4256
1,True,True,True,False,0.1824
2,True,False,True,True,0.4032
3,True,False,True,False,0.1728
4,False,True,True,True,0.3584
5,False,True,True,False,0.1536
6,False,False,True,True,0.0
7,False,False,True,False,0.0
8,True,True,False,True,0.0056
9,True,True,False,False,0.0124


In [38]:
var_false_df

Unnamed: 0,A,C,val
2,True,True,0.54
3,True,False,0.36
6,False,True,0.48
7,False,False,0.32


In [45]:
pd.merge(var_true_df, var_false_df, on=['A', 'C'], how='inner')

Unnamed: 0,A,C,val_x,val_y
0,True,True,0.03,0.54
1,True,False,0.07,0.36
2,False,True,0.06,0.48
3,False,False,0.14,0.32


In [49]:
pd.concat([var_true_df, var_false_df]).groupby(["A", "C"], as_index=False)["val"].sum()

Unnamed: 0,A,C,val
0,False,False,0.46
1,False,True,0.54
2,True,False,0.43
3,True,True,0.57


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=97e6598c-0899-4008-b807-f77cd0756453' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>