In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
dice_experiment = np.random.choice([1,0], (100, 2), [1/2, 1/2])

In [3]:
def show_prob(x:np.ndarray, values:np.ndarray):
    for dim in range(values.shape[0]):
        for v in values[dim,:]:
            prob = np.sum(x[:,dim] == v) / len(x[:,dim])
            print(f'variable {dim} probability of {v}: {prob}')

In [4]:
show_prob(dice_experiment, np.array([[0,1],[0,1]]))

variable 0 probability of 0: 0.58
variable 0 probability of 1: 0.42
variable 1 probability of 0: 0.56
variable 1 probability of 1: 0.44


In [5]:
def missing(x:np.ndarray, target:int, p:float):
    switch = np.random.choice([True, False], 1, [p, 1-p])
    if x == target and switch:
        return np.nan
    else:
        return x

In [6]:
missingArray = np.frompyfunc(missing, 3, 1)

In [7]:
missing_dice = missingArray(dice_experiment, 1, 0.2)

In [8]:
show_prob(missing_dice, np.array([[0,1],[0,1]]))

variable 0 probability of 0: 0.58
variable 0 probability of 1: 0.2
variable 1 probability of 0: 0.56
variable 1 probability of 1: 0.18


In [9]:
def isnan(x:int):
    if np.isnan(x):
        return True
    else:
        return False

In [10]:
isnanArray = np.frompyfunc(isnan, 1, 1)

In [11]:
def number_of_unkown(x:np.ndarray):
    return np.sum(isnanArray(x))

In [12]:
def show_prob_interval(x:np.ndarray, values:list):
    for dim in range(values.shape[0]):
        n_unknown = number_of_unkown(x[:,dim])
        for v in values[dim,:]:
            low_prob = np.sum(x[:,dim] == v) / len(x[:,dim])
            upper_prob = (np.sum(x[:,dim] == v) + n_unknown) / len(x[:,dim])
            print(f'variable {dim} lower probability of {v}: {low_prob}, upper probability of {v}: {upper_prob}')

In [13]:
show_prob_interval(missing_dice, np.array([[0,1],[0,1]]))

variable 0 lower probability of 0: 0.58, upper probability of 0: 0.8
variable 0 lower probability of 1: 0.2, upper probability of 1: 0.42
variable 1 lower probability of 0: 0.56, upper probability of 0: 0.82
variable 1 lower probability of 1: 0.18, upper probability of 1: 0.44


In [49]:
def is_equal(x: np.ndarray):
    mask = np.all(x, axis=1)
    return mask

In [53]:
def get_joint(x:np.ndarray):
    unique_obs = np.unique(x, axis=0)
    for uniq in unique_obs:
        count = np.sum(is_equal(x == uniq))
        prob = count / x.shape[0]
        print(f'{uniq} has probability of {prob}')

In [54]:
get_joint(dice_experiment)

[0 0] has probability of 0.31
[0 1] has probability of 0.27
[1 0] has probability of 0.25
[1 1] has probability of 0.17
