In [9]:
import pandas as pd 
import numpy as np

#Matrix operations
from itertools import product

# Factors / CPD's

### Synthetic data

In [10]:
alpha = 0.5

In [11]:
scope_mat = pd.read_csv('dataset/scope.csv')
scope_mat

def factor_scope(Xi):
    # return the indexes with 1
    idx = np.where(scope_mat[Xi]==1)
    Xs = idx[0]+1
    return Xs
    
factor_scope("X6")

array([ 6,  7, 10])

Elemination order

In [12]:
# The scope consist of the parents and the node itself
variables_to_eliminate = ['X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9']
elimination_scope = [("X1", ['X7', 'X8']),("X2", ['X4', 'X8']),("X3", ['X9', 'X8']),
                     ("X5", ['X9', 'X8']),("X4", ['X11', 'X8', 'X9']),("X6", ['X7', 'X10']),
                     ("X7", []),("X8", []),("X9", ['X10', 'X6']), ("X10", ['X7']),("X11", ['X8', 'X9'])]


In [13]:
def create_factor(variable,parent_list):
    d = 1 + len(parent_list)
    m = 2 ** 2
    columns = parent_list + [variable]
    df = pd.DataFrame(product([0,1],repeat=d),columns=columns)
    return df

def cpd_factor(variable, parent_list, alpha):
    cpd = create_factor(variable, parent_list)
    variables = list(cpd.columns)
    n = len(variables)
    cpd_e = cpd.copy()
    if n == 1:
        cpd_e["prob"] = 1 - alpha
        return cpd_e
    else:
        child = variables[n-1]
        # generate factor 
        parents = cpd_e.iloc[:,:-1].copy()
        parents["sum"] = parents.sum(axis=1)
        cpd_e["prob"] = 1 - (alpha ** (1 + parents["sum"]))
        cpd_e["prob"].where(cpd_e[f"{child}"] == 0, 1-cpd_e["prob"],axis=0,inplace=True)
        return cpd_e
    
factor_test = cpd_factor(elimination_scope[2][0], elimination_scope[2][1], alpha)


In [14]:
# Generate factors for all nodes
# list form: [(factor, name), ... ]
factor_list = []
for elm in elimination_scope:
    name = 'phi_' + str(elm[0])
    cpd = cpd_factor(elm[0], elm[1], alpha)
    factor_list.append((cpd, name))

print(factor_list[0])

(   X7  X8  X1   prob
0   0   0   0  0.500
1   0   0   1  0.500
2   0   1   0  0.750
3   0   1   1  0.250
4   1   0   0  0.750
5   1   0   1  0.250
6   1   1   0  0.875
7   1   1   1  0.125, 'phi_X1')


# Sum product

In [15]:
# Multiply factors togheter
def factor_product(factor_1, factor_2):
    # List of all common nodes to join on
    nodes_to_join = factor_1.columns.intersection(factor_2.columns)
    nodes_to_join = nodes_to_join.drop("prob")
    nodes = list(nodes_to_join)

    #Inner join the factors by common nodes
    new_factor = factor_1.merge(factor_2, how='inner', on=nodes)
    new_factor["prob"] = new_factor["prob_x"]*new_factor["prob_y"] # Mulætiply the probabilities
    new_factor = new_factor.drop(columns=["prob_x","prob_y"])
    return new_factor

factor_product(factor_list[0][0], factor_list[1][0])

Unnamed: 0,X7,X8,X1,X4,X2,prob
0,0,0,0,0,0,0.25
1,0,0,0,0,1,0.25
2,0,0,0,1,0,0.375
3,0,0,0,1,1,0.125
4,0,0,1,0,0,0.25
5,0,0,1,0,1,0.25
6,0,0,1,1,0,0.375
7,0,0,1,1,1,0.125
8,1,0,0,0,0,0.375
9,1,0,0,0,1,0.375


In [16]:
# From algorithm 9.1 in the book (p. 298)
def sum_product_eliminate_var(factors : list, var : int, print_info = False):
    factors_list = [i[0] for i in factors] # Factors
    names_list = [i[1] for i in factors] # Names of factors

    lst_scope = []   # Factors in scope
    lst_scope_i = [] # Names of factors in scope   
    lst_not_scope = [] # Factors not in scope
    lst_not_scope_i = [] # Names of factors not in scope

    # Split factors into scope and not scope
    for i in range(0, len(factors_list)):
        if var in factors_list[i].columns:
            lst_scope.append(factors_list[i])
            lst_scope_i.append(names_list[i])
        else:
            lst_not_scope.append(factors_list[i])
            lst_not_scope_i.append(names_list[i])

    # Multiply all factors in scope
    factor_ = lst_scope[0]
    for i in range(1, len(lst_scope)):
        factor_ = factor_product(factor_, lst_scope[i])

    # All variables that should not be marginalized
    col = list(factor_.columns[factor_.columns != var].drop("prob")) 

    # Marginalize the variable
    tau = factor_.groupby(col, as_index = False)['prob'].sum()
    tau_name = 'tau_' + str(var) 

    # Add the new factor to the list of factors not in scope
    lst_not_scope = lst_not_scope + [tau]
    lst_not_scope_i = lst_not_scope_i + [tau_name]
    
    # Debuggering prints
    if print_info:
        for elm in lst_scope:
            print(elm.columns)
        print(lst_scope_i)

    return list(zip(lst_not_scope, lst_not_scope_i))

print(sum_product_eliminate_var(factor_list, 'X1', False))

[(   X4  X8  X2   prob
0   0   0   0  0.500
1   0   0   1  0.500
2   0   1   0  0.750
3   0   1   1  0.250
4   1   0   0  0.750
5   1   0   1  0.250
6   1   1   0  0.875
7   1   1   1  0.125, 'phi_X2'), (   X9  X8  X3   prob
0   0   0   0  0.500
1   0   0   1  0.500
2   0   1   0  0.750
3   0   1   1  0.250
4   1   0   0  0.750
5   1   0   1  0.250
6   1   1   0  0.875
7   1   1   1  0.125, 'phi_X3'), (   X9  X8  X5   prob
0   0   0   0  0.500
1   0   0   1  0.500
2   0   1   0  0.750
3   0   1   1  0.250
4   1   0   0  0.750
5   1   0   1  0.250
6   1   1   0  0.875
7   1   1   1  0.125, 'phi_X5'), (    X11  X8  X9  X4    prob
0     0   0   0   0  0.5000
1     0   0   0   1  0.5000
2     0   0   1   0  0.7500
3     0   0   1   1  0.2500
4     0   1   0   0  0.7500
5     0   1   0   1  0.2500
6     0   1   1   0  0.8750
7     0   1   1   1  0.1250
8     1   0   0   0  0.7500
9     1   0   0   1  0.2500
10    1   0   1   0  0.8750
11    1   0   1   1  0.1250
12    1   1   0   0  0.8750


In [17]:
def sum_product_ve(factors, order):

    # For each variable in the elimination order
    for var in order:
        factors = sum_product_eliminate_var(factors, var)
        print(var, [i[1] for i in factors])
    return factors

print(sum_product_ve(factor_list, variables_to_eliminate))

X1 ['phi_X2', 'phi_X3', 'phi_X5', 'phi_X4', 'phi_X6', 'phi_X7', 'phi_X8', 'phi_X9', 'phi_X10', 'phi_X11', 'tau_X1']
X2 ['phi_X3', 'phi_X5', 'phi_X4', 'phi_X6', 'phi_X7', 'phi_X8', 'phi_X9', 'phi_X10', 'phi_X11', 'tau_X1', 'tau_X2']
X3 ['phi_X5', 'phi_X4', 'phi_X6', 'phi_X7', 'phi_X8', 'phi_X9', 'phi_X10', 'phi_X11', 'tau_X1', 'tau_X2', 'tau_X3']
X4 ['phi_X5', 'phi_X6', 'phi_X7', 'phi_X8', 'phi_X9', 'phi_X10', 'phi_X11', 'tau_X1', 'tau_X3', 'tau_X4']
X5 ['phi_X6', 'phi_X7', 'phi_X8', 'phi_X9', 'phi_X10', 'phi_X11', 'tau_X1', 'tau_X3', 'tau_X4', 'tau_X5']
X6 ['phi_X7', 'phi_X8', 'phi_X10', 'phi_X11', 'tau_X1', 'tau_X3', 'tau_X4', 'tau_X5', 'tau_X6']
X7 ['phi_X8', 'phi_X11', 'tau_X3', 'tau_X4', 'tau_X5', 'tau_X7']
X8 ['tau_X8']
X9 ['tau_X9']
[(   X11  X10      prob
0    0    0  0.438965
1    0    1  0.250122
2    1    0  0.186035
3    1    1  0.124878, 'tau_X9')]
