In [696]:

from numpy.ma.extras import row_stack
# import numpy as np
from pgmpy.readwrite import BIFReader
import random
import numpy
import copy
from collections import deque

In [697]:
#Parameters
GROUP_ID = '04'
ALGORITHM = 've'
NETWORK_NAME = 'networks/insurance.bif'
REPORT = '[Disease]'
EVIDENCE_LEVEL = 'Moderate Evidence'
EVIDENCE = "Age=Adolescent; GoodStudent=False; SeniorTrain=False; DrivQuality=Poor; MakeModel=Luxury; CarValue=FiftyThou; DrivHist=Zero"

In [698]:
class Factor:
    def __init__(self, variable, variableValues, combos, no_to_name):
        self.variables = variable
        self.values = variableValues
        self.cardinality = []
        for combo in combos:
            self.cardinality.append(combo)
        self.shared = []
        self.no_to_name = no_to_name


    def addVariables(self, vars):
        common_var = []
        for i in range(len(vars.variables)):
            self.no_to_name.update({vars.variables[i]: vars.no_to_name[vars.variables[i]]})
            if vars.variables[i] not in self.variables:
                self.variables.append(vars.variables[i])
                self.cardinality.append(vars.cardinality[i])
            else:
                common_var.append(vars.variables[i])
                if self.variables.index(vars.variables[i]) not in self.shared:
                    self.shared.append(self.variables.index(vars.variables[i]))

        return common_var

    def cancelOutVariable(self, vars):
        for variable in vars:
            if variable in self.variables:
                self.cardinality.pop(self.variables.index(variable))
                self.variables.remove(variable)
                self.shared = []
            else:
                print('Variable not found, issue with sum out function')

    def setValues(self, values):
        self.values = values


In [699]:
def normalizeDistribution(distribution):
    #Normalizes the numbers in a distribution to sum to 1
    sum = 0
    for value in distribution:
        sum += value
    if sum != 0:
        for i in range(len(distribution)):
            distribution[i] = distribution[i] / sum
    return distribution

def normalizeVEDistribution(distribution):
    sum = 0
    answer = []
    #Normalizes the distribution created in variable elim
    for value in distribution:
        sum += distribution[value]
    if sum != 0:
        for value in distribution:
            distribution[value] = distribution[value] / sum
            answer.append(distribution[value])
    return answer

In [700]:
numSum = 0
numPointwise = 0

def variableElim(query, observedValues, bayesianNetwork):
    global numSum, numPointwise

    variableElimination = makeFactors(observedValues, bayesianNetwork)
    for variable in reverseOrder(bayesianNetwork):
        if variable not in observedValues.keys() and variable not in query:
            variableElimination = sumOut(variable, variableElimination)
    final = pointWise(variableElimination)
    return normalizeVEDistribution(final.values), numSum, numPointwise

def reverseOrder(network):
    queue = deque()
    visited = set()
    order = []
    roots = network.get_roots()
    for i in roots:
        queue.append(i)
        visited.add(i)
    while queue:
        cur = queue.popleft()
        order.append(cur)
        for i in network.get_children(cur):
            if i not in visited:
                queue.append(i)
                visited.add(i)
    order.reverse()
    return order

#works by iterating through all cardinalities and counting from 0 to the value
def getAllCombinations(factor):

    combo = [()]
    for num in factor.cardinality:
        result = []
        translated_result = []
        count = 0

        for i in range(num):    #count up from 0 to the value of the number
            for nums in combo:  #iterate through already added numbers
                result.append(nums + (i,))  #add value from 0 to value of the number to already added numbers
                translated_result.append(translateTuple(result[count], factor)) #translate the number to the no_to_name value
                count +=1

        translated = translated_result
        combo = result


    return translated, combo

#iterates through all cardinalities counting from 0 to the value except for evidence variable, which is only the set value
def getEvidenceCombinations(factor, fixed_positions):

    def generate_combos(ranges, current_combo, position):   #recursively calls generate combos for each fixed variable created
        if position == len(ranges):                 #
            result.append(tuple(current_combo))
            return

        for value in ranges[position]:
            generate_combos(ranges, current_combo + [value], position + 1)

    # Build ranges for each position
    ranges = []
    for i, card in enumerate(factor.cardinality):   #process each variable in factor
        if i in fixed_positions:                    #append value if its evidence
            ranges.append([fixed_positions[i]])
        else:
            ranges.append(list(range(card)))        #append the range from 0 to the cardinal value

    result = []
    translated = []
    generate_combos(ranges, [], 0)
    for combo in result:
        translated.append(translateTuple(combo, factor))

    return translated, result


def translateTuple(tuple, factor):
    count = 0
    new_tuple = ()
    for num in tuple:
        variable = factor.variables[count]
        new_tuple = new_tuple + (factor.no_to_name[variable][num],)
        count+=1

    return new_tuple

def variablesOppose(opposite, value, shared):
    answer = False
    new_tuple = value
    for i in range(len(opposite)):
        if i == shared:
            if opposite[i] != value[i]:
                first = value[:i]
                second = value[i+1:]
                new_tuple = first + second
                answer = True
            else:
                answer = False
        elif opposite[i] != value[i]:
            return False, value
    return answer, new_tuple

def getValue(possibilities, cpd):

    value = cpd.values
    for num in possibilities:
        value = value[num]

    return value

def shareValues(factor, combined):
    for value in factor:
        if value not in combined:
            return False
    return True

def makeFactors(evidence, network):
    factors = []
    for factor in network.cpds:
        if factor.variables[0] not in evidence.keys():
            values = {}
            num = 0
            combos, nums = getAllCombinations(factor)   #cartesian product, returns combination of numbers and combination with corresponding values
            for combo in combos:
                probability = getValue(nums[num], factor)   #get the value of the probability
                values.update({combo:probability})          #update dictionary with each combination
                num+=1
            factors.append(Factor(factor.variables, values, factor.cardinality, factor.no_to_name)) #create a new factor object that can be  manipulated
        else:   #make factor given evidence. gets rid of all other possibilities except for the evidence
            values = {}
            num = 0
            copied = copy.deepcopy(factor)  #allows us to manipulate the factor and still access the original
            for key in evidence:
                if evidence[key][0] == '“': #handles quotations in evidence
                    evidence[key] = evidence[key][1:-1]
                if key in factor.variables:
                    ev = key
                    ev_index = factor.variables.index(key)
                    value_index = factor.state_names[key].index(evidence[key])
                    break


            evidence_factor = Factor(factor.variables, values, factor.cardinality, factor.no_to_name)
            evidence_factor.cardinality[ev_index] = 1
            new_mapping = {ev:{value_index:evidence[ev]}}
            evidence_factor.no_to_name.pop(ev)
            evidence_factor.no_to_name.update(new_mapping)

            combos,nums = getEvidenceCombinations(evidence_factor, {ev_index:value_index})
            for combo in combos:
                probability = getValue(nums[num], copied)
                values.update({combo:probability})
                num+=1
            factors.append(Factor(factor.variables, values, evidence_factor.cardinality, evidence_factor.no_to_name))
    return factors

#sum out function eliminates variable(first parameter) we are focusing on in the list of our created factors(second parameter)
def sumOut(variable, variableElimination):
    global numSum
    numSum+=1

    factors_with = []
    factors_without = []
    for factor in variableElimination:
        if variable not in factor.variables:    #don't manipulate these factors
            factors_without.append(factor)
        else:
            factors_with.append(factor)         #combine and sum out these factors
    combine = pointWise(factors_with)           #combine factor with pointwise
    new_values = {}
    removed = []
    for value in combine.values:

        if value not in removed:    #check if factor is already summed out
            sum = combine.values[value]
            new_tuple = value
            for opposite in combine.values:     #check for opposing values
                index = combine.variables.index(variable)
                oppose, tuple = variablesOppose(opposite, value, index) #returns true if values oppose and the resulting value
                if oppose:
                    sum += combine.values[opposite]     #sum out
                    removed.append(opposite)            #add opposing factor to removed because it's summed out
                    new_tuple = tuple
            new_values.update({new_tuple:sum})          #update the values of this newly created factor

    combine.setValues(new_values)
    combine.cancelOutVariable([variable])
    comb = combine.values.values()
    for value in comb:
        if value != 1.0:     #takes care of rounding issues and gets rid of the variables that will be the same when combined
            factors_without.append(combine) #send a new list of factors with summed out factors included.
            break
    return factors_without


def pointWise(variableElimination):
    global numPointwise
    numPointwise += 1

    temp_factor = copy.deepcopy(variableElimination[0]) #make a copy so we don't overwrite the Factor object

    for n in range(len(variableElimination) - 1): #iterate through all other factors
        previous_temp = copy.deepcopy(temp_factor)

        temp_factor.addVariables(variableElimination[n+1]) #adds new variables and finds the index of the shared variables

        temp_values = {}    #dictionary will hold combined values and its probability
        for combo, prob1 in previous_temp.values.items(): #get all key value pairs from combined dictionary before adding new variables
            for key2, prob2 in variableElimination[n+1].values.items(): #get all key value pairs of new factor
                compatible = True
                for var in variableElimination[n+1].variables:  #for each variable in new factor
                    for value in temp_factor.shared:            #for every shared variable
                        if var == previous_temp.variables[value]:   #if they match the variable in the previously combined variables
                            idx1 = previous_temp.variables.index(var) #index of matched variable in combo
                            idx2 = variableElimination[n+1].variables.index(var) #index of matched variable in new factor
                            if combo[idx1] != key2[idx2]:       #if matched variables value are not the same
                                compatible = False
                                break
                if compatible:
                    combined = []
                    for var in temp_factor.variables:       #every factor in combined variables
                        if var in previous_temp.variables:  #if variable already part of the last combined variables
                            combined.append(combo[previous_temp.variables.index(var)])  #append the old variables
                        else:
                            combined.append(key2[variableElimination[n+1].variables.index(var)]) #append the new variables

                    temp_values[tuple(combined)] = prob1 * prob2    #get the product

        temp_factor.setValues(temp_values)  #set the new probabilities

    return temp_factor


In [701]:
def gibbsSampling(Network, reportedVars, evidenceVars, numSamples, burnInLength):
    possVals = Network.states
    randomVals = {}

    #Initializing random state values
    for key in possVals.keys():
        if key in evidenceVars.keys():
            randomVals[key] = evidenceVars[key]
        else:
            randomVals[key] = random.choice(possVals[key])

    #initializing counting tables
    countingTables = []
    for var in reportedVars:
        countingTable = []
        for i in range(len(possVals[var])):
            #case where one of the reported vars is given as evidence
            if(var in evidenceVariables):
                countingTable.append('x')
            else:
                countingTable.append(0)
        countingTables.append(countingTable)

    for i in range(numSamples):
        #Need to pick a random (non evidence) node use its markov blanket
        randVar = random.choice(list(Network.nodes))
        while(randVar in evidenceVars.keys()):
            randVar = random.choice(list(Network.nodes))
        mb = getParentsAndChildren(Network, randVar)
        #Calculate probability of variable given its parents
        cpds = Network.get_cpds(randVar)
        valueDistribution = []
        for value in cpds.state_names[randVar]:
            #Calculates P(x_i|parents(X_i))
            parents = mb[0]
            valueArrIndex = cpds.name_to_no[randVar][value]
            valueArr = cpds.values[valueArrIndex]
            for parent in parents:
                parentVal = randomVals[parent]
                parentArrIndex = cpds.name_to_no[parent][parentVal]
                valueArr = valueArr[parentArrIndex]
            varVal = valueArr
            childrenProb = 1
            #Calculates/sums all child probabilities P(y_j | parents(y_j))
            for children in mb[1]:
                childCpds = Network.get_cpds(children)
                parents = Network.get_parents(children)
                childrenArrIndex = childCpds.name_to_no[children][randomVals[children]]
                childrenArr = childCpds.values[childrenArrIndex]
                for parent in parents:
                    if(parent != randVar):
                        parentVal = randomVals[parent]
                    else:
                        parentVal = value
                    parentArrIndex = childCpds.name_to_no[parent][parentVal]
                    childrenArr = childrenArr[parentArrIndex]
                #Fix for 0 probabilities causing the sampling to get stuck in a state
                if(childrenArr == 0):
                    childrenArr = 0.05
                childrenProb *= childrenArr
            valueDistribution.append(varVal*childrenProb)
        #normalized value distribution for randomly selected variable has been found
        valueDistribution = normalizeDistribution(valueDistribution)
        possibleVals = Network.states[randVar]
        chosenVal = numpy.random.choice(possibleVals, p = valueDistribution)
        #setting the variable's randomly (probability-distribution) based value
        randomVals[randVar] = chosenVal
        #counting value for reported variable if not in burn in period
        if(i >= burnInLength):
            for t in range(len(countingTables)):
                countingIndex = possVals[reportedVars[t]].index(randomVals[reportedVars[t]])
                #check for case where reported variable is also a part of evidence
                if(countingTables[t][0] != 'x'):
                    countingTables[t][countingIndex] += 1


    for table in countingTables:
        if(table[0] != 'x'):
            table = normalizeDistribution(table)
    return countingTables


def getParentsAndChildren(Network, variable):
    #Gets a sorted list of parents and children variables of a random variable
    #Used for easier access of a variable's markov blanket
    mb = Network.get_markov_blanket(variable)
    nodes = []
    parents = Network.get_parents(variable)
    children = []
    for node in mb:
        nodeParents = Network.get_parents(node)
        if variable in nodeParents:
            children.append(node)
    nodes.append(parents)
    nodes.append(children)
    return nodes

In [None]:
def createOutput(reportVariables, network, probabilityDistribution):
    #writes the file output for gibbs sampling to a text file
    fileName = GROUP_ID + '_' + ALGORITHM + '_' + NETWORK_NAME.lstrip('networks/').rstrip('.bif') + '_' + EVIDENCE_LEVEL + '.csv'
    states = network.states
    with(open(fileName, 'w') as file):
        counter = -1
        for var in reportVariables:
            file.write(var)
            counter += 1
            for state in states[var]:
                file.write(",")
                file.write(state)
            file.write("\n")
            if ALGORITHM == "gibbs":
                for i in range(len(probabilityDistribution[counter])):
                    file.write(str(probabilityDistribution[counter][i]))
                    if i != len(probabilityDistribution[counter])-1:
                        file.write(",")
                    else:
                        file.write("\n")
            else:
                file.write(str(probabilityDistribution[counter]))
                file.write("\n")

def createVEOutput(network, probabilityDistribution,var):
    #writes the file output for gibbs sampling to a text file
    fileName = GROUP_ID + '_' + ALGORITHM + '_' + NETWORK_NAME.lstrip('networks/').rstrip('.bif') + '_' + EVIDENCE_LEVEL + '.csv'
    states = network.states
    with(open(fileName, 'a') as file):
        file.write(var)
        for state in states[var]:
            file.write(",")
            file.write(state)
        file.write("\n")
        if ALGORITHM == "ve":
            for i, n in enumerate(probabilityDistribution):
                file.write(str(round(n, 2)))
                print(str(round(n, 2)))
                if i < len(probabilityDistribution) - 1:
                    file.write(",")
                else:
                    file.write("\n")


#function for sanitizing input
def removeSpace(string):
    newStr = ''
    for c in string:
        if c != ' ':
            newStr += c
    return newStr



reader = BIFReader(NETWORK_NAME)
model = reader.get_model()
reportList = REPORT.lstrip('[').rstrip(']')
reportVariables1 = reportList.split(',')
reportVariables = []
for var in reportVariables1:
    reportVariables.append(removeSpace(var))
evidenceVariables = {}
if(EVIDENCE_LEVEL != "None"):
    splitter = EVIDENCE.split(";")
    for var in splitter:
        #Edge case for variables with '=' characters in their values
        if'"' in var:
            splitter2 = var.split('"')
            evidenceVariables[splitter2[0][:-1].strip()] = splitter2[1].strip()
        else:
            splitter2 = var.split("=")
            print()
            if splitter2[1][0] == "“":
                if splitter2[1][-1] != "”":
                    splitter2[1] += "=" + splitter2[2]
                    splitter2.pop(2)
            evidenceVariables[splitter2[0].strip()] = splitter2[1].strip()


#runs the algorithm corresponding to the input
if ALGORITHM == "gibbs":
    probDist = gibbsSampling(model,reportVariables,evidenceVariables, 210000, 10000)
    createOutput(reportVariables, model, probDist)
elif ALGORITHM == "ve":
    for variable in reportVariables:
        probDist, sumOut, pointWise = variableElim(variable, evidenceVariables, model)
        createVEOutput(model,probDist,variable)

else:
    print("Unrecognized algorithm:", ALGORITHM)












