In [41]:
import numpy as np
import matplotlib.pyplot as plt
from pgmpy.factors.discrete import State
from pgmpy.models import BayesianNetwork
from pgmpy.factors.discrete import TabularCPD
from pgmpy.inference import VariableElimination
from pgmpy.sampling import BayesianModelSampling

### 1. Build the Network<br>
Now we build our Bayesian Network, from starting to top to the bottom

In [97]:
list_of_edges = []

# the first connections are: "age" and "last_year_performance" on "own_club_player_overall"
list_of_edges.append(("age", "player_overall"))
list_of_edges.append(("last_year_performance", "player_overall"))

# 
list_of_edges.append(("player_overall", "youth_accademy_alternative"))
list_of_edges.append(("player_overall", "market_alternative"))
list_of_edges.append(("youth_accademy_alternative", "player_alternative"))
list_of_edges.append(("market_alternative", "player_alternative"))
list_of_edges.append(("club_economic_situation", "Sell"))
list_of_edges.append(("player_alternative", "Sell"))

# evidence_card: cardinality/no. of states of variables in `evidence`(if any)
network = BayesianNetwork(list_of_edges)

### 2. Create a print function

In [217]:
# !pip install prettytable
from prettytable import PrettyTable
import itertools as it

# it gets the values from the matrix of probability 
# it returns a new matrix of probability in the order of variables
def get_values(values, index):
    try:
        _tmp = []   
        for i in values:
            _tmp.append(format(i[index], ".2f"))
        return _tmp
    except:
        return -1

def probability_shape(list_name):
    length = 0
    count = 0
    for a in list_name:
        if not count == 0:
            if not length == len(a):
                return -1
        length = len(a)
        count = count + 1
    return [count, length]

def evidence_shape(evidence_list, node_list):
    length = []
    for a in evidence_list:
        length.append(len(a)-1)
    _tmp = 1
    for a in length:
        _tmp = _tmp * a
    return [len(node_list)-1, _tmp]
    

        
# print the probabilities given their variables
def print_cpd(node, probability, evidence=None):
    try:
        if evidence == None:
            variable = []
            table = PrettyTable([node[0].upper(),"Probability".upper()])
            for i in range(1, len(node)):
                table.add_row([node[i],str(probability[i-1]).replace("[","").replace("]","")])
            return table
        
        shape_probability = probability_shape(probability)
        shape_evidence = evidence_shape(evidence, node)
        if not shape_evidence == shape_probability:
            raise Exception("The probability table and the evidence table don't have the same shape.")
            
        title = []
        combination = {}
        for e in evidence:
            key = ""
            values = []
            for i in range(0, len(e)):
                if i == 0:
                    key = e[i]
                else:
                    values.append(e[i])
            combination[key] = values

        for key in combination.keys():
            title.append(key.upper())

        combinations = it.product(*(combination[key] for key in combination.keys()))

        node_name = str(node[0]) + " ("
        for i in range(1,len(node)):
            if i == len(node)-1:
                node_name = node_name + str(node[i]) + ")"
            else:
                node_name = node_name + str(node[i]) + ", "
        title.append(node_name.upper())

        table = PrettyTable(title)
        count = 0
        for r in combinations: 
            row = []
            for a in r:
                row.append(a)
                
            _probability_tmp = get_values(probability, count)
            if _probability_tmp == -1:
                raise Exception("There's something wrong with the probability table.")
            row.append(str(_probability_tmp).replace(", ", " - ").replace("[","").replace("]","").replace("'",""))
            table.add_row(row)
            count = count + 1
        return table
    except Exception as ex:
        return ex

### 3. Insert the CPD

In [218]:
# create the CPDs for age, last_year_performance and player_overall
cpd_age = TabularCPD("age", 3, [[.16], [.58], [.26]])

# the function wants: a list composed by the name of the node (first element)
# and by the variables names, and a list of probabilities
print(print_cpd(["age", "young", "mature", "old"], [[.16], [.58], [.26]]))


cpd_last_year_performance = TabularCPD("last_year_performance", 2, [[.32], [.68]])
print(print_cpd(["last_year_performance", "above_expectations", "under_expectations"], [[.32], [.68]]))

cpd_club_economic_situation = TabularCPD("club_economic_situation", 2, [[.4], [.6]])
print(print_cpd(["club_economic_situation", "stable", "debts"], [[.4], [.6]]))

# values of probability for player_overall and its evidences age and last_year_performance
# below you can see the print of the CPD for player_overall
cpd_value_player_overall = [[.27, .07, .26, .75, .43, .79],
                            [.58, .40, .32, .21, .37, .19],
                            [.15, .53, .42, .04, .20, .02]]
cpd_player_overall = TabularCPD("player_overall", 3, cpd_value_player_overall, 
                           evidence = ["last_year_performance", "age"], evidence_card=[2, 3])

# the function has some parameters:
# 1. a list of evidence; every evidence is a list where the first element of the list is the name
#    of the evidence and the others are the names of variables.
# 2. a list of the variable names for the final node.
# 3. the probabilities matrix.
print(print_cpd(["player_overall", "bad_player", "normal_player", "top_player"], cpd_value_player_overall,
                [["last_year_performance", "above_expectations", "under_expectations"], 
                 ["age", "young", "mature", "old"]]))

+--------+-------------+
|  AGE   | PROBABILITY |
+--------+-------------+
| young  |     0.16    |
| mature |     0.58    |
|  old   |     0.26    |
+--------+-------------+
+-----------------------+-------------+
| LAST_YEAR_PERFORMANCE | PROBABILITY |
+-----------------------+-------------+
|   above_expectations  |     0.32    |
|   under_expectations  |     0.68    |
+-----------------------+-------------+
+-------------------------+-------------+
| CLUB_ECONOMIC_SITUATION | PROBABILITY |
+-------------------------+-------------+
|          stable         |     0.4     |
|          debts          |     0.6     |
+-------------------------+-------------+
+-----------------------+--------+--------------------------------------------------------+
| LAST_YEAR_PERFORMANCE |  AGE   | PLAYER_OVERALL (BAD_PLAYER, NORMAL_PLAYER, TOP_PLAYER) |
+-----------------------+--------+--------------------------------------------------------+
|   above_expectations  | young  |                   0.27

In [219]:
# now we define the CPDs for market_alternative and youth_accademy_alternative given player_overall
cpd_market_alternative = TabularCPD("market_alternative", 2, [[.97, .83, .22],[.03, .17, .78]],
                                    evidence = ["player_overall"], evidence_card=[3])
print(print_cpd(["market_alternative", "yes", "no"], [[.97, .83, .22],[.03, .17, .78]],
                [["player_overall", "bad_player", "normal_player", "top_player"]]))

cpd_youth_accademy_alternative = TabularCPD("youth_accademy_alternative", 2, [[.98, .43, .02],[.02, .57, .98]],
                                   evidence = ["player_overall"], evidence_card=[3])
print(print_cpd(["youth_accademy_alternative", "yes", "no"], [[.98, .43, .02],[.02, .57, .98]],
                [["player_overall", "bad_player", "normal_player", "top_player"]]))

+----------------+------------------------------+
| PLAYER_OVERALL | MARKET_ALTERNATIVE (YES, NO) |
+----------------+------------------------------+
|   bad_player   |         0.97 - 0.03          |
| normal_player  |         0.83 - 0.17          |
|   top_player   |         0.22 - 0.78          |
+----------------+------------------------------+
+----------------+--------------------------------------+
| PLAYER_OVERALL | YOUTH_ACCADEMY_ALTERNATIVE (YES, NO) |
+----------------+--------------------------------------+
|   bad_player   |             0.98 - 0.02              |
| normal_player  |             0.43 - 0.57              |
|   top_player   |             0.02 - 0.98              |
+----------------+--------------------------------------+


In [220]:
# then, we add the CPD for player_alternative given club_economic_situation, market_alternative 
# and youth_accademy_alternative
cpd_value_player_alternative = [[.50, .35, .97, .00],
                                [.50, .65, .03, .00],
                                [.00, .00, .00, 1]]
cpd_player_alternative = TabularCPD("player_alternative", 3, cpd_value_player_alternative,
                                    evidence = ["youth_accademy_alternative","market_alternative"], 
                                    evidence_card=[2,2])
print(print_cpd(["player_alternative", "market", "youth_accademy","not_exist"], cpd_value_player_alternative,
                [["youth_accademy_alternative", "yes", "no"],
                 ["market_alternative", "yes", "no"]]))

+----------------------------+--------------------+--------------------------------------------------------+
| YOUTH_ACCADEMY_ALTERNATIVE | MARKET_ALTERNATIVE | PLAYER_ALTERNATIVE (MARKET, YOUTH_ACCADEMY, NOT_EXIST) |
+----------------------------+--------------------+--------------------------------------------------------+
|            yes             |        yes         |                   0.50 - 0.50 - 0.00                   |
|            yes             |         no         |                   0.35 - 0.65 - 0.00                   |
|             no             |        yes         |                   0.97 - 0.03 - 0.00                   |
|             no             |         no         |                   0.00 - 0.00 - 1.00                   |
+----------------------------+--------------------+--------------------------------------------------------+


In [221]:
# at the end, we have the final node Sell, given club_economic_situation and player_alternative
cpd_value_Sell = [[.15, .08, .05, .78, .99, .65],
                  [.85, .92, .95, .22, .01, .35]]
cpd_Sell = TabularCPD("Sell", 2, cpd_value_Sell,
                      evidence = ["club_economic_situation", "player_alternative"], 
                      evidence_card = [2,3])
print(print_cpd(["Sell", "yes", "no"], cpd_value_Sell,
                [["club_economic_situation", "stable", "debts"],
                 ["player_alternative", "market", "youth_accademy", "not_exist"]]))

+-------------------------+--------------------+----------------+
| CLUB_ECONOMIC_SITUATION | PLAYER_ALTERNATIVE | SELL (YES, NO) |
+-------------------------+--------------------+----------------+
|          stable         |       market       |  0.15 - 0.85   |
|          stable         |   youth_accademy   |  0.08 - 0.92   |
|          stable         |     not_exist      |  0.05 - 0.95   |
|          debts          |       market       |  0.78 - 0.22   |
|          debts          |   youth_accademy   |  0.99 - 0.01   |
|          debts          |     not_exist      |  0.65 - 0.35   |
+-------------------------+--------------------+----------------+


In [222]:
network.add_cpds(cpd_age, cpd_last_year_performance, cpd_player_overall, cpd_market_alternative,
                 cpd_youth_accademy_alternative, cpd_player_alternative, cpd_club_economic_situation,
                 cpd_Sell)

In [223]:
print(network.check_model())

True


In [309]:
dictionary_of_nodes = {1:{"age":["young", "mature","mature","old","not wanted"]},
                       2:{"last_year_performance":["above_expectations","under_expectations","not wanted"]},
                       3:{"club_economic_situation":["stable", "debts","not wanted"]},
                       4:{"player_overall":["bad_player", "normal_player", "top_player", "not wanted"]},
                       5:{"market_alternative":["yes", "no", "not wanted"]},
                       6:{"youth_accademy_alternative":["yes","no","not wanted"]},
                       7:{"player_alternative":["market","youth_accademy","not_exist","not wanted"]},
                       8:{"Sell":["yes", "no", "not wanted"]}}

probability = ""
evidence = {}
evidence_for_sampling = {}
running = True
while running:
    try:
        print("Select the node for the probability:")
        count = 0
        list_of_key = []
        for n in dictionary_of_nodes.keys():
            for key in dictionary_of_nodes[n]:
                print(str(n) + ". " + str(key))
                list_of_key.append(str(key))
        input_probability = input("Insert your decision:\t")
        if int(input_probability) > len(dictionary_of_nodes):
            raise Exception("The number exceeds the dimension.")
        probability = list_of_key[int(input_probability)-1]
        del dictionary_of_nodes[int(input_probability)]
    except Exception as ex:
        print(ex)
        print()
    else:
        running = False

print()
print()
print("Select the evidence(s)")
for n in dictionary_of_nodes.keys():
    for key in dictionary_of_nodes[n]:
        print(key.upper())
        count = 0
        for i in dictionary_of_nodes[n][key]:
            print(str(count) + ". " + i)
            count = count + 1
        running = True
        while running:
            try:
                input_decision = input("Insert your decision:\t")
                if int(input_decision) >= len(dictionary_of_nodes[n][key]):
                    raise Exception("The number exceeds the dimension.")
                if not dictionary_of_nodes[n][key][int(input_decision)] == "not wanted":
                    evidence[key] = dictionary_of_nodes[n][key][int(input_decision)]      
                    evidence_for_sampling[key] = int(input_decision)
            # also if the number isn't an integer
            except Exception as ex:
                print(ex)
            else:
                running = False
        
    print("===========================================")
    print()
print()
query = ""
if not evidence:
    print("P(" + probability + ")")
else:
    tmp_string = "P(" + probability + " | "
    for key in evidence.keys():
        if key == list(evidence.keys())[-1]:
            tmp_string = tmp_string + key + "=" + evidence[key] + ")"
        else:
            tmp_string = tmp_string + key + "=" + evidence[key] + ", "
    print(tmp_string)

Select the node for the probability:
1. age
2. last_year_performance
3. club_economic_situation
4. player_overall
5. market_alternative
6. youth_accademy_alternative
7. player_alternative
8. Sell
Insert your decision:	1


Select the evidence(s)
LAST_YEAR_PERFORMANCE
0. above_expectations
1. under_expectations
2. not wanted
Insert your decision:	2

CLUB_ECONOMIC_SITUATION
0. stable
1. debts
2. not wanted
Insert your decision:	2

PLAYER_OVERALL
0. bad_player
1. normal_player
2. top_player
3. not wanted
Insert your decision:	2

MARKET_ALTERNATIVE
0. yes
1. no
2. not wanted
Insert your decision:	2

YOUTH_ACCADEMY_ALTERNATIVE
0. yes
1. no
2. not wanted
Insert your decision:	2

PLAYER_ALTERNATIVE
0. market
1. youth_accademy
2. not_exist
3. not wanted
Insert your decision:	3

SELL
0. yes
1. no
2. not wanted
Insert your decision:	0


P(age | player_overall=top_player, Sell=yes)


In [313]:
print(evidence_for_sampling)
inference = BayesianModelSampling(network)
evidence = []
for key in evidence_for_sampling.keys():
    evidence.append(State(key, evidence_for_sampling[key]))
inference.likelihood_weighted_sample(evidence = evidence, size=100, seed = 10)

{'player_overall': 2, 'Sell': 0}


  0%|          | 0/8 [00:00<?, ?it/s]

Unnamed: 0,age,player_overall,last_year_performance,youth_accademy_alternative,market_alternative,player_alternative,club_economic_situation,Sell,_weight
0,2,2,1,1,1,2,1,0,0.0130
1,2,2,1,1,0,0,0,0,0.0030
2,1,2,0,1,1,2,1,0,0.3445
3,2,2,1,1,1,2,1,0,0.0130
4,1,2,1,1,1,2,1,0,0.1300
...,...,...,...,...,...,...,...,...,...
95,2,2,0,1,1,2,1,0,0.2730
96,1,2,1,1,1,2,1,0,0.1300
97,1,2,1,0,1,1,1,0,0.1980
98,0,2,1,1,1,2,0,0,0.0020


In [41]:
# https://cs.adelaide.edu.au/~dsuter/Harbin_course/BayesNetsInference.pdf
# P(player_overall=top) = \sum P(top, age, lyp) = \sum_age_lyp P(age) * P(lyp) * P(top | age, lyp)
p_b = 0.16*0.32*0.15 + 0.16*0.68*0.04 + 0.58*0.32*0.53 + 0.58*0.68*0.20 + 0.26*0.32*0.42 + 0.26*0.68*0.02
print(p_b)

0.22776000000000002


In [None]:
# P(player_overall=top) = \sum P(top, age, lyp) = \sum_age_lyp P(age) * P(lyp) * P(top | age, lyp)
p_b = 0.16*0.32*0.15 + 0.16*0.68*0.04 + 0.58*0.32*0.53 + 0.58*0.68*0.20 + 0.26*0.32*0.42 + 0.26*0.68*0.02
print(p_b)