In [21]:
import copy

import numpy as np
from pgmpy.readwrite import BIFReader
import random
import numpy

In [22]:
#Parameters
GROUP_ID = '04'
ALGORITHM = 'gibbs'
NETWORK_NAME = 'networks/win95pts.bif'
REPORT = '[Problem1,Problem2,Problem3,Problem4,Problem5,Problem6]'
EVIDENCE_LEVEL = 'Moderate'
EVIDENCE = 'Problem5 =Yes'

In [23]:
def normalizeDistribution(distribution):
    sum = 0
    ogDist = copy.deepcopy(distribution)
    for value in distribution:
        sum += value
    if sum != 0:
        for i in range(len(distribution)):
            distribution[i] = distribution[i] / sum
    #edge case where all probabilities are infitesmally small
    else:
        print('debug')
        for i in range(len(distribution)):
            distribution[i] = 1 / len(distribution)
    return distribution

In [24]:
def gibbsSampling(Network, reportedVars, evidenceVars, numSamples, burnInLength):
    possVals = Network.states
    randomVals = {}

    #Initializing random state values
    for key in possVals.keys():
        if key in evidenceVars.keys():
            randomVals[key] = evidenceVars[key]
        else:
            randomVals[key] = random.choice(possVals[key])

    #initializing counting tables
    countingTables = []
    for var in reportedVars:
        countingTable = []
        for i in range(len(possVals[var])):
            #case where one of the reported vars is given as evidence
            if(var in evidenceVariables):
                countingTable.append('x')
            else:
                countingTable.append(0)
        countingTables.append(countingTable)

    for i in range(numSamples):
        #Need to pick a random (non evidence) node use its markov blanket
        randVar = random.choice(list(Network.nodes))
        while(randVar in evidenceVars.keys()):
            randVar = random.choice(list(Network.nodes))
        mb = getParentsAndChildren(Network, randVar)
        #Calculate probability of variable given its parents
        cpds = Network.get_cpds(randVar)
        valueDistribution = []
        for value in cpds.state_names[randVar]:
            #Calculates P(x_i|parents(X_i))
            parents = mb[0]
            valueArrIndex = cpds.name_to_no[randVar][value]
            valueArr = cpds.values[valueArrIndex]
            for parent in parents:
                parentVal = randomVals[parent]
                parentArrIndex = cpds.name_to_no[parent][parentVal]
                valueArr = valueArr[parentArrIndex]
            varVal = valueArr
            childrenProb = 1
            #Calculates/sums all child probabilities P(y_j | parents(y_j))
            for children in mb[1]:
                childCpds = Network.get_cpds(children)
                parents = Network.get_parents(children)
                childrenArrIndex = childCpds.name_to_no[children][randomVals[children]]
                childrenArr = childCpds.values[childrenArrIndex]
                for parent in parents:
                    if(parent != randVar):
                        parentVal = randomVals[parent]
                    else:
                        parentVal = value
                    parentArrIndex = childCpds.name_to_no[parent][parentVal]
                    childrenArr = childrenArr[parentArrIndex]
                #Fix for 0 probabilities causing the sampling to get stuck
                if(childrenArr == 0):
                    childrenArr = 0.05
                childrenProb *= childrenArr
            valueDistribution.append(varVal*childrenProb)
        #normalized value distribution for randomly selected variable has been found
        valueDistribution = normalizeDistribution(valueDistribution)
        #print("Value distribution:", valueDistribution)
        possibleVals = Network.states[randVar]
        #print("Possible values for variable", randVar, possibleVals)
        chosenVal = numpy.random.choice(possibleVals, p = valueDistribution)
        #print("Chosen value:", chosenVal)
        #setting the variable's randomly (probability-distribution) based value
        randomVals[randVar] = chosenVal
        #counting value for reported variable if not in burn in period
        if(i >= burnInLength):
            for t in range(len(countingTables)):
                countingIndex = possVals[reportedVars[t]].index(randomVals[reportedVars[t]])
                #case where reported variable is also a part of evidence
                if(countingTables[t][0] != 'x'):
                    countingTables[t][countingIndex] += 1


    x = 0
    for table in countingTables:
        if(table[0] != 'x'):
            table = normalizeDistribution(table)
    print(countingTables)


def getParentsAndChildren(Network, variable):
    mb = Network.get_markov_blanket(variable)
    nodes = []
    parents = Network.get_parents(variable)
    children = []
    for node in mb:
        nodeParents = Network.get_parents(node)
        if variable in nodeParents:
            children.append(node)
    nodes.append(parents)
    nodes.append(children)
    return nodes

In [25]:
reader = BIFReader(NETWORK_NAME)
model = reader.get_model()
reportList = REPORT.lstrip('[').rstrip(']')
reportVariables = reportList.split(',')
for var in reportVariables:
    var = var.strip()
evidenceVariables = {}
if(EVIDENCE_LEVEL != "None"):
    splitter = EVIDENCE.split(";")
    for var in splitter:
        #Edge case for variables with '=' characters in their values
        if'"' in var:
            splitter2 = var.split('"')
            evidenceVariables[splitter2[0][:-1].strip()] = splitter2[1]
        else:
            splitter2 = var.split("=")
            evidenceVariables[splitter2[0].strip()] = splitter2[1]

if ALGORITHM == "gibbs":
    gibbsSampling(model,reportVariables,evidenceVariables, 2100000, 100000)
elif ALGORITHM == "ve":
    print()
    #variable elim
else:
    print("Unrecognized algorithm:", ALGORITHM)



[[0.4053895, 0.5946105], [1.0, 0.0], [0.004033, 0.995967], [0.0344925, 0.9655075], ['x', 'x'], [0.8327905, 0.1672095]]
