In [1]:
from math import floor, log
from collections import defaultdict
import random
import tqdm
import numpy as np
from numpy.random import poisson, binomial, uniform, exponential, multinomial, normal, multivariate_normal
from scipy.stats import norm
from scipy.stats import kstest, norm

def dice():
    return random.randint(1, 6)
ct = 10000000


In [74]:
def q1():
    sigx, sigy = random.random() * 10 + 2, random.random() * 10 + 2
    rho = random.random()

    y = normal(0, sigy)
    epsilon = 0.05

    # x_bar_y = normal(sigx/sigy * rho * y, sigx * np.sqrt(1 - rho ** 2), size=ct)
    # E_x = np.mean(x_bar_y)
    # E_x2 = np.mean(x_bar_y ** 2)
    # Var_x = np.var(x_bar_y)

    analytical = sigx ** 2 * (1 - rho ** 2) + (sigx ** 2)/(sigy ** 2) * (rho ** 2) * (y ** 2)

    covar_mat = np.array([[sigx**2, rho * sigx * sigy], [rho * sigx * sigy, sigy ** 2]])
    xy = multivariate_normal([0, 0], covar_mat, ct)
    x_bar_y = xy[abs(xy[:,1] - y) <= epsilon][:,0]
    E_x_bar_y= np.mean(x_bar_y)
    E_x2_bar_y = np.mean(x_bar_y ** 2)
    Var_x_bar_y = np.var(x_bar_y)
    ks_statistic, p_value = kstest(x_bar_y, 'norm')
    print(f"sigx, sigy, rho: [{sigx}, {sigy}, {rho}]")
    print(f"p_value: {p_value}, samples: {len(x_bar_y)}")
    print(f"y = {y}")
    print(f"E[X|y] = {E_x_bar_y}, fE[X**2] = {E_x2_bar_y}, fVar(X) = {Var_x_bar_y}")
    print(f"Analytical: {analytical}")
q1()

sigx, sigy, rho: [8.410935784007822, 11.801004078987116, 0.4819285689623868]
p_value: 0.0, samples: 30655
y = 5.328680182951202
E[X|y] = 1.8819125802410812, fE[X**2] = 57.769018008451454, fVar(X) = 54.2274230487818
Analytical: 57.66330119995443


In [75]:
def q2():
    rho = 0.7
    epsilon = 0.05

    covar_mat = np.array([[1, rho], [rho, 1]])
    xy = multivariate_normal([0, 0], covar_mat, ct)
    x, y = xy[:,0], xy[:,1]
    # z = 2x - y, w = x + y
    z = 2 * x - y
    w = x + y

    ks_statistic, p_value1 = kstest(z, 'norm')
    ks_statistic, p_value2 = kstest(w, 'norm')
    print(f"p_z, p_w: ({p_value1}, {p_value2})")
    print(f"Cov(Z,W): {np.cov(np.stack((z, w)))[0][1]}")
    print(f"Z: {np.mean(z)} {np.var(z)}, W: {np.mean(w)} {np.var(w)}")
    print(f"Analytical Z: {0} {5 - 4 * rho}, W: {0} {2 + 2 * rho}, Cov: {1 + rho}")
    zw = np.stack((z, w)).T

    w_s = normal(0, np.sqrt(2))
    z_bar_w = zw[abs(zw[:,1] - w_s) <= epsilon][:,0]
    E_z_bar_w = np.mean(z_bar_w)
    Var_z_bar_w = np.var(z_bar_w)
    ks_statistic, p_value = kstest(z_bar_w, 'norm')
    print(f"p_value of Z|w : {p_value}, samples: {len(z_bar_w)}")
    print(f"w_s = {w_s}")
    print(f"Empirical E[Z|w] = {E_z_bar_w}, Var(Z|w) = {Var_z_bar_w}")

    analytical_mean = 0.5 * w_s
    analytical_std = 9/2 - 9/2 * rho
    print(f"Analytical E[Z|w] = {analytical_mean}, Var(Z|w) = {analytical_std}")
q2()

p_z, p_w: (0.0, 0.0)
Cov(Z,W): 1.6993599869474214
Z: -0.00031463471005996053 2.1990176797205505, W: 0.00021384259031382335 3.4008730348928755
Analytical Z: 0 2.2, W: 0 3.4, Cov: 1.7
p_value of Z|w : 0.0, samples: 179828
w_s = 1.10711584943971
Empirical E[Z|w] = 0.5487644823050815, Var(Z|w) = 1.3489341689916539
Analytical E[Z|w] = 0.553557924719855, Var(Z|w) = 1.35


In [2]:
def markov_start(start, P, length):
    cur = start
    seq = []
    for i in range(length):
        distribution = P[cur].flatten()
        v = multinomial(1, distribution)
        v = np.where(v == 1)[0][0]
        seq.append(v)
        cur = v
    return seq
def markov_rdm(dist, P, length):
    cur = np.nonzero(multinomial(1, dist))[0]
    seq = [cur]
    for i in range(length):
        distribution = P[cur].flatten()
        v = np.nonzero(multinomial(1, distribution))[0]
        seq.append(v)
        cur = v
    return seq
def q3():
    P = np.array([[0.3, 0.2, 0.5], [0.5, 0.1, 0.4], [0.3, 0.3, 0.4]])
    ct2 = 200000
    parta = np.sum([markov_start(0, P, 2) == [1, 0] for t in range(ct2)]) / ct2
    print(f"Part a: {parta}")
    
    partb = np.sum([markov_start(0, P, 2)[1] == 2 for t in range(ct2)]) / ct2
    print(f"Part b: {partb}")

    partc = np.sum([markov_rdm([0, 0.5, 0.5], P, 2) == [1, 1, 2] for t in range(ct2)]) / ct2
    print(f"Part c: {partc}")

    partd = np.sum([markov_rdm([0, 0.5, 0.5], P, 2)[2] == 0 for t in range(ct2)]) / ct2
    print(f"Part d: {partd}")
q3()

Part a: 0.099445
Part b: 0.43165
Part c: 0.02006
Part d: 0.34016


In [64]:
def q4():
    a = 0.3
    b = 0.59
    P = np.array([[a, 1-a], [1-b, b]])
    ct2 = 200000
    cts = np.zeros((4, 4))
    for i in range(ct2):
        chain = markov_rdm([0.5, 0.5], P, 2)
        z_1 = 2*chain[0] + chain[1]
        z_2 = 2*chain[1] + chain[2]
        cts[z_1[0]][z_2[0]] += 1
    row_sum = cts.sum(axis=1).reshape((-1, 1))
    print(cts / row_sum)
q4()
    

[[0.30226423 0.69773577 0.         0.        ]
 [0.         0.         0.41117027 0.58882973]
 [0.29828928 0.70171072 0.         0.        ]
 [0.         0.         0.40946502 0.59053498]]


In [59]:
np.random.seed(123)
a = np.array(list(range(16))).reshape((4, 4))
s = a.sum(axis=1).reshape((-1, 1))
print(a)
print(s)
print(a/s)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]
[[ 6]
 [22]
 [38]
 [54]]
[[0.         0.16666667 0.33333333 0.5       ]
 [0.18181818 0.22727273 0.27272727 0.31818182]
 [0.21052632 0.23684211 0.26315789 0.28947368]
 [0.22222222 0.24074074 0.25925926 0.27777778]]
