In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
dice_experiment = np.random.choice([1,0], (100, 2), [1/2, 1/2])

In [3]:
def show_prob(x:np.ndarray, values:np.ndarray):
    for dim in range(values.shape[0]):
        for v in values[dim,:]:
            prob = np.sum(x[:,dim] == v) / len(x[:,dim])
            print(f'variable {dim} probability of {v}: {prob}')

In [4]:
show_prob(dice_experiment, np.array([[0,1],[0,1]]))

variable 0 probability of 0: 0.52
variable 0 probability of 1: 0.48
variable 1 probability of 0: 0.44
variable 1 probability of 1: 0.56


In [5]:
def missing(x:np.ndarray, target:int, p:float):
    switch = np.random.choice([True, False], 1, [p, 1-p])
    if x == target and switch:
        return np.nan
    else:
        return x

In [6]:
missingArray = np.frompyfunc(missing, 3, 1)

In [7]:
missing_dice = missingArray(dice_experiment, 1, 0.2)

In [8]:
show_prob(missing_dice, np.array([[0,1],[0,1]]))

variable 0 probability of 0: 0.52
variable 0 probability of 1: 0.22
variable 1 probability of 0: 0.44
variable 1 probability of 1: 0.27


In [9]:
def isnan(x:int):
    if np.isnan(x):
        return True
    else:
        return False

In [10]:
isnanArray = np.frompyfunc(isnan, 1, 1)

In [11]:
def number_of_unkown(x:np.ndarray):
    return np.sum(isnanArray(x))

In [12]:
def show_prob_interval(x:np.ndarray, values:list):
    for dim in range(values.shape[0]):
        n_unknown = number_of_unkown(x[:,dim])
        for v in values[dim,:]:
            low_prob = np.sum(x[:,dim] == v) / len(x[:,dim])
            upper_prob = (np.sum(x[:,dim] == v) + n_unknown) / len(x[:,dim])
            print(f'variable {dim} lower probability of {v}: {low_prob}, upper probability of {v}: {upper_prob}')

In [13]:
show_prob_interval(missing_dice, np.array([[0,1],[0,1]]))

variable 0 lower probability of 0: 0.52, upper probability of 0: 0.78
variable 0 lower probability of 1: 0.22, upper probability of 1: 0.48
variable 1 lower probability of 0: 0.44, upper probability of 0: 0.73
variable 1 lower probability of 1: 0.27, upper probability of 1: 0.56


In [14]:
def is_equal(x: np.ndarray):
    mask = np.all(x, axis=1)
    return mask

In [15]:
def get_joint(x:np.ndarray):
    unique_obs = np.unique(x, axis=0)
    for uniq in unique_obs:
        count = np.sum(is_equal(x == uniq))
        prob = count / x.shape[0]
        print(f'{uniq} has probability of {prob}')

In [16]:
get_joint(dice_experiment)

[0 0] has probability of 0.23
[0 1] has probability of 0.29
[1 0] has probability of 0.21
[1 1] has probability of 0.27


In [17]:
def cartesian_product(arrays):
    la = len(arrays)
    dtype = np.result_type(*arrays)
    arr = np.empty([len(a) for a in arrays] + [la], dtype=dtype)
    for i, a in enumerate(np.ix_(*arrays)):
        arr[...,i] = a
    return arr.reshape(-1, la)

In [30]:
def is_possible_completion(x: np.ndarray, y: np.ndarray):
    # Ensure y contains only numeric values
    y_numeric = np.asarray(y, dtype=float)
    
    # Create mask for non-NaN values in y
    mask = np.logical_not(np.isnan(y_numeric))
    
    # Perform element-wise comparison between non-NaN elements of x and y
    comparison = x[mask] == y_numeric[mask]
    
    # Check if all comparisons are True
    out = np.all(comparison)
    
    return out

In [41]:
def get_joint_interval(x: np.ndarray, values: tuple):
    possible_values = cartesian_product(values)
    for possible_v in possible_values:
        lower_count = np.sum(np.all(x == possible_v, axis=1))
        lower_prob = lower_count / x.shape[0]
        upper_count = lower_countv
        unknown = x[np.sum(np.isnan(x.astype(float)), axis=1) > 0]
        for i in unknown:
            if is_possible_completion(possible_v, i):
                upper_count += 1
        upper_prob = upper_count / x.shape[0]
        print(f'{possible_v} has a probability between {lower_prob} and {upper_prob}')

In [42]:
get_joint_interval(missing_dice, (np.array([0,1]), np.array([0,1])))

[0 0] has a probability between 0.23 and 0.55
[0 1] has a probability between 0.14 and 0.46
[1 0] has a probability between 0.12 and 0.35
[1 1] has a probability between 0.04 and 0.27
