In [32]:
import numpy as np
import pandas as pd

def readFactorTable(varnames, probs, outcomesList):
    factorTable = pd.DataFrame({'probs': probs})

    totalfactorTableLength = len(probs)
    numVars = len(varnames)

    k = 1
    for i in range(numVars - 1, -1, -1):
        levs = outcomesList[i]
        numLevs = len(levs)
        col = []
        for j in range(0, numLevs):
            col = col + [levs[j]] * k
        factorTable[varnames[i]] = col * int(totalfactorTableLength / (k * numLevs)) 
        k = k * numLevs
    return factorTable

BatteryState = readFactorTable(['battery'], [0.9, 0.1], [[1, 0]])
FuelState = readFactorTable(['fuel'], [0.9, 0.1], [[1, 0]])
GaugeBF = readFactorTable(['gauge', 'battery', 'fuel'], [0.8, 0.2, 0.2, 0.1, 0.2, 0.8, 0.8, 0.9], [[1, 0], [1, 0], [1, 0]])

carNet = [BatteryState, FuelState, GaugeBF]  # carNet is a list of factors


print(BatteryState)
print(FuelState)
print(GaugeBF)

   probs  battery
0    0.9        1
1    0.1        0
   probs  fuel
0    0.9     1
1    0.1     0
   probs  fuel  battery  gauge
0    0.8     1        1      1
1    0.2     0        1      1
2    0.2     1        0      1
3    0.1     0        0      1
4    0.2     1        1      0
5    0.8     0        1      0
6    0.8     1        0      0
7    0.9     0        0      0


In [14]:
def joinFactors(Factor1, Factor2):
    # your code

    shared_col = [col for col in Factor1.columns if col in Factor2.columns and col != "probs"]

    # If merge two df with a samely named column "example", if will become two: "example_x" and "example_y"

    if shared_col == []:

        joined = pd.merge(Factor1, Factor2, how="cross")
    
    else:
        
        joined = pd.merge(Factor1, Factor2, on=shared_col)

    joined["probs"] = joined["probs_x"] * joined["probs_y"]

    joined = joined.drop(["probs_x", "probs_y"], axis=1)

    return joined

joinFactors(BatteryState, FuelState)

Unnamed: 0,battery,fuel,probs
0,1,1,0.81
1,1,0,0.09
2,0,1,0.09
3,0,0,0.01


In [15]:
joinFactors(joinFactors(BatteryState, FuelState), GaugeBF)


Unnamed: 0,battery,fuel,gauge,probs
0,1,1,1,0.648
1,1,1,0,0.162
2,1,0,1,0.018
3,1,0,0,0.072
4,0,1,1,0.018
5,0,1,0,0.072
6,0,0,1,0.001
7,0,0,0,0.009


In [16]:
joinFactors(joinFactors(GaugeBF, FuelState), BatteryState)


Unnamed: 0,fuel,battery,gauge,probs
0,1,1,1,0.648
1,1,1,0,0.162
2,0,1,1,0.018
3,0,1,0,0.072
4,1,0,1,0.018
5,1,0,0,0.072
6,0,0,1,0.001
7,0,0,0,0.009


In [30]:
joinFactors(GaugeBF, BatteryState)

Unnamed: 0,fuel,battery,gauge,probs
0,1,1,1,0.72
1,0,1,1,0.18
2,1,1,0,0.18
3,0,1,0,0.72
4,1,0,1,0.02
5,0,0,1,0.01
6,1,0,0,0.08
7,0,0,0,0.09


In [None]:
def marginalizeFactor(factorTable, hiddenVar):
    # your code

    mar_col = [col for col in factorTable.columns if col != hiddenVar and col != "probs" ]

    marginalized = factorTable.groupby(mar_col, as_index=False)['probs'].sum()

    return marginalized

marginalizeFactor(joinFactors(GaugeBF, BatteryState), 'gauge')

Unnamed: 0,fuel,battery,probs
0,0,0,0.1
1,1,0,0.1
2,0,1,0.9
3,1,1,0.9


In [25]:
joinFactors(marginalizeFactor(GaugeBF, 'gauge'), BatteryState)

Unnamed: 0,fuel,battery,probs
0,0,0,0.1
1,1,0,0.1
2,0,1,0.9
3,1,1,0.9


In [26]:
joinFactors(marginalizeFactor(joinFactors(GaugeBF, BatteryState), 'battery'), FuelState)

Unnamed: 0,fuel,gauge,probs
0,0,0,0.081
1,0,1,0.019
2,1,0,0.234
3,1,1,0.666


In [27]:
marginalizeFactor(joinFactors(joinFactors(GaugeBF, FuelState), BatteryState), 'battery')

Unnamed: 0,fuel,gauge,probs
0,0,0,0.081
1,0,1,0.019
2,1,0,0.234
3,1,1,0.666


In [28]:
marginalizeFactor(joinFactors(marginalizeFactor(joinFactors(GaugeBF, BatteryState), 'battery'), FuelState), 'gauge')

Unnamed: 0,fuel,probs
0,0,0.1
1,1,0.9


In [29]:
marginalizeFactor(joinFactors(marginalizeFactor(joinFactors(GaugeBF, BatteryState), 'battery'), FuelState), 'fuel')

Unnamed: 0,gauge,probs
0,0,0.315
1,1,0.685


In [35]:
def evidenceUpdateNet(bayesnet, evidenceVars, evidenceVals):
    # your code

    evidence_net = []

    for factor in bayesnet:

        evident_factor = factor.copy()

        index = 0

        for var in evidenceVars:
            if var in evident_factor.columns:
                evident_factor = evident_factor.loc[evident_factor[var] == evidenceVals[index]]

            index += 1

        evidence_net.append(evident_factor)

    return evidence_net

BatteryState = readFactorTable(['battery'], [0.9, 0.1], [[1, 0]])
FuelState = readFactorTable(['fuel'], [0.9, 0.1], [[1, 0]])
GaugeBF = readFactorTable(['gauge', 'battery', 'fuel'], [0.8, 0.2, 0.2, 0.1, 0.2, 0.8, 0.8, 0.9], [[1, 0], [1, 0], [1, 0]])

carNet = [BatteryState, FuelState, GaugeBF]  # carNet is a list of factors

evidenceUpdateNet(carNet, ['fuel', 'battery'], [1, 0])


[   probs  battery
 1    0.1        0,
    probs  fuel
 0    0.9     1,
    probs  fuel  battery  gauge
 2    0.2     1        0      1
 6    0.8     1        0      0]

In [40]:
def inference(bayesnet, hiddenVars, evidenceVars, evidenceVals):
    # your code

    evi_net = evidenceUpdateNet(bayesnet, evidenceVars, evidenceVals)

    inf_tab = evi_net[0]

    index = 0

    for df in evi_net:
        if index == 0:
            index = 1
            continue

        inf_tab = joinFactors(inf_tab, df)
    
    inf_tab = marginalizeFactor(inf_tab, hiddenVars)

    inf_tab["probs"] = inf_tab["probs"] / inf_tab["probs"].sum()

    return inf_tab


print("inference starts")
print(inference(carNet, ['battery', 'fuel'], [], []))  ## chapter 8 equation (8.30)
print(inference(carNet, ['battery'], ['fuel'], [0]))  ## chapter 8 equation (8.31)
print(inference(carNet, ['battery'], ['gauge'], [0]))  ##chapter 8 equation  (8.32)
print(inference(carNet, [], ['gauge', 'battery'], [0, 0]))  ## chapter 8 equation (8.33)
print("inference ends")

inference starts
   battery  fuel  gauge  probs
0        0     0      0  0.009
1        0     0      1  0.001
2        0     1      0  0.072
3        0     1      1  0.018
4        1     0      0  0.072
5        1     0      1  0.018
6        1     1      0  0.162
7        1     1      1  0.648
   battery  fuel  gauge  probs
0        0     0      0   0.09
1        0     0      1   0.01
2        1     0      0   0.72
3        1     0      1   0.18
   battery  fuel  gauge     probs
0        0     0      0  0.028571
1        0     1      0  0.228571
2        1     0      0  0.228571
3        1     1      0  0.514286
   battery  fuel  gauge     probs
0        0     0      0  0.111111
1        0     1      0  0.888889
inference ends


In [None]:
from functools import reduce


def readFactorTablefromData(data, varnames):
    numVars = len(varnames)
    outcomesList = []

    for i in range(0, numVars):
        name = varnames[i]
        outcomesList = outcomesList + [list(set(data[name]))]

    lengths = list(map(lambda x: len(x), outcomesList))
    m = reduce(lambda x, y: x * y, lengths)

    factorTable = pd.DataFrame({'probs': np.zeros(m)})

    k = 1
    for i in range(numVars - 1, -1, -1):
        levs = outcomesList[i]
        numLevs = len(levs)
        col = []
        for j in range(0, numLevs):
            col = col + [levs[j]] * k
        factorTable[varnames[i]] = col * int(m / (k * numLevs))
        k = k * numLevs

    numLevels = len(outcomesList[0])

    # creates the vector called fact to index probabilities
    # using matrix multiplication with the data frame
    fact = np.zeros(data.shape[1])
    lastfact = 1
    for i in range(len(varnames) - 1, -1, -1):
        fact = np.where(np.isin(list(data), varnames[i]), lastfact, fact)
        lastfact = lastfact * len(outcomesList[i])

    # Compute unnormalized counts of subjects that satisfy all conditions
    a = (data - 1).dot(fact) + 1
    for i in range(0, m):
        factorTable.at[i, 'probs'] = sum(a == (i + 1))

    # normalize the conditional probabilities
    skip = int(m / numLevels)
    for i in range(0, skip):
        normalizeZ = 0
        for j in range(i, m, skip):
            normalizeZ = normalizeZ + factorTable['probs'][j]
        for j in range(i, m, skip):
            if normalizeZ != 0:
                factorTable.at[j, 'probs'] = factorTable['probs'][j] / normalizeZ

    return factorTable

riskFactorNet = pd.read_csv('RiskFactorsData.csv')

income = readFactorTablefromData(riskFactorNet, ['income'])

print(income)