In [1150]:
import numpy as np
import matplotlib.pyplot as plt

In [1151]:
cdf_difficulty = np.array([0.6, 0.4])
cdf_intelligence = np.array([0.7, 0.3])
cdf_grade = np.array([[0.3, 0.4, 0.3], [0.05, 0.25, 0.7], [0.9, 0.08, 0.02], [0.5, 0.3, 0.2]])
cdf_letter = np.array([[0.1, 0.9], [0.4, 0.6], [0.99, 0.01]])

In [1152]:
### Sample values from the probability distributions
def sample_observation():
    difficulty = np.random.choice([0, 1], p=cdf_difficulty)
    intelligence = np.random.choice([0, 1], p=cdf_intelligence)
    
    #cdf grade is dependent on difficulty, retrieving the value associated with the difficulty key or index from the cdf_grade
    grade = np.random.choice([0, 1, 2], p=cdf_grade[intelligence * 2 + difficulty]) 
    letter = np.random.choice([0, 1], p=cdf_letter[grade])

    return difficulty, intelligence, grade, letter

# Simulate 100 observations
observations = [sample_observation() for _ in range(100)]


In [1153]:
# We see the outcome of the bayesian network
# (difficulty (d), intelligence (i), grade (i), letter (l))
print(observations)

[(0, 0, 2, 0), (0, 0, 1, 0), (0, 0, 1, 1), (0, 1, 0, 1), (1, 0, 2, 0), (1, 0, 2, 0), (0, 1, 0, 1), (0, 0, 1, 1), (0, 1, 0, 1), (0, 1, 0, 1), (0, 0, 1, 1), (0, 1, 0, 1), (0, 1, 0, 1), (0, 1, 0, 1), (1, 1, 1, 1), (1, 1, 1, 1), (0, 0, 0, 1), (1, 0, 2, 0), (1, 0, 2, 0), (1, 0, 2, 0), (1, 1, 0, 1), (0, 0, 0, 1), (0, 1, 0, 1), (0, 1, 0, 1), (1, 0, 1, 1), (1, 0, 1, 0), (1, 0, 2, 0), (0, 0, 1, 1), (0, 0, 0, 1), (0, 0, 1, 1), (0, 0, 1, 0), (0, 0, 1, 1), (1, 1, 1, 1), (0, 1, 0, 1), (0, 0, 1, 0), (0, 0, 1, 1), (0, 0, 1, 1), (0, 0, 0, 1), (0, 0, 2, 0), (0, 1, 0, 1), (0, 0, 0, 1), (0, 1, 0, 0), (0, 0, 1, 1), (0, 0, 0, 1), (0, 0, 0, 1), (1, 0, 2, 0), (0, 1, 0, 1), (0, 0, 1, 1), (1, 0, 2, 0), (1, 0, 2, 0), (0, 1, 0, 1), (1, 0, 2, 0), (1, 1, 2, 0), (0, 1, 0, 1), (0, 1, 0, 1), (1, 1, 1, 1), (0, 0, 1, 1), (0, 1, 0, 1), (0, 1, 0, 1), (0, 1, 0, 1), (0, 0, 1, 0), (1, 1, 0, 1), (0, 0, 1, 1), (1, 0, 1, 1), (0, 1, 1, 1), (0, 0, 0, 1), (0, 1, 0, 1), (1, 0, 2, 0), (0, 0, 0, 1), (1, 0, 2, 0), (1, 1, 1, 0), (0, 1

In [1154]:
obs_g1 = (np.sum([obs[2]==0 for obs in observations]))/len(observations)
print(obs_g1)

0.41


In [1155]:
# P(grade=2|difficulty=1) = P(grade=2 and difficulty=1) / P(difficulty=1)
g2_given_d1 = np.sum([obs[2]==1 and obs[0]==1 for obs in observations]) / np.sum([obs[0]==1 for obs in observations])
print(round(g2_given_d1, 2))

0.35


In [1156]:
#Data sampeling is for noobs!
# P(Grade = 1)
def p_grade(g):
    g_1, i = 0, -1
    for k in range(len(cdf_intelligence)):
        for j in range(len(cdf_difficulty)):
            i += 1
            g_1 += cdf_grade[i][g] * cdf_difficulty[j] * cdf_intelligence[k]
    return round(g_1,3)

print(p_grade(0))

0.362


In [1157]:
# P(grade=2|difficulty=1) = P(grade=2 and difficulty=1) / P(difficulty=1)
# Calculate probability P(grade=2 and difficulty=1)  = P(grade = 2) P(grade = 2 | difficulty = 1)
def g_2_given_d_1():
    p_grade_2 = p_grade(1) # Probaility of grade = 2
    p_difficulty_1 = cdf_difficulty[1] # Probailibty og difficulty = 1
    p_joint = p_grade_2 * p_difficulty_1 # Proba
    return p_joint

print(g_2_given_d_1())

0.1152


In [1158]:
#Compare P(G=1) 
print("P(G=1)")
print("In theory: ", p_grade(0)) 
print("Observed: ", obs_g1)

#Compare P(G=2|D=1)
print("\nP(G=2|D=1)")
print("In theory: ", g_2_given_d_1())
print("Observed: ", round(g2_given_d1, 2))

P(G=1)
In theory:  0.362
Observed:  0.41

P(G=2|D=1)
In theory:  0.1152
Observed:  0.35
