## LR3

In [None]:
import math
import random as r
import numpy as np
import scipy.stats as st
import scipy.integrate as _in
import matplotlib.pyplot as plt

### Task 1
$f(x,y) = {16 \over 9} (x + {y^3 \over 4}), 0 \le x, y \le 1 $

$f_X(x) = \int_{0}^{1} {16 \over 9} (x + {y^3 \over 4})dy = {16x + 1 \over 9}$
\
\
$MX = \int_{0}^{1} xf(x)dx = {35 \over 54} \approx 0,648$

$f_Y(y) = \int_{0}^{1} {16 \over 9} (x + {y^3 \over 4})dx = {{8 + 4y^3} \over 9}$
\
\
$MY = \int_{0}^{1} yf(y)dy = {8 \over 15} = 0,5(3)$

$MXY = \int_{0}^{1} \int_{0}^{1} xyf(x,y)dxdy = {46 \over 135} = 0,3(407)$
\
\
$KXY = MXY - MX * MY = {46 \over 135} - {35 \over 53} * {8 \over 15} = {-2 \over 405} \approx -0,005$

$f_X(x) * f_Y(y) = {16x + 1 \over 9} * {{8 + 4y^3} \over 9} = {{(16x + 1) * (8 + 4y^3)} \over 81} \neq f(x,y) $

$f(x|y) = {f(x, y) \over f_Y(y)} = {{4x + y^3} \over {2 + y^3}}$
\
\
$f(y|x) = {f(x, y) \over f_X(x)} = {{16x + 16y^3} \over {16x + 1}}$

In [None]:
mean = lambda val: sum(val) / len(val)
meanxy = lambda x, y: sum([x[i] * y[i] for i in range(len(x))]) / len(x)
dispersion = lambda val: sum(e ** 2 for e in val) / len(val) - mean(val) ** 2

def covariance (x, y):
    mx = mean(x)
    my = mean(y)
    sub_x, sub_y = [i - mx for i in x], [i - my for i in y] 
    return sum([sub_x[i] * sub_y[i] for i in range(len(sub_x))]) / (len(sub_x) - 1)
    
corelation = lambda x, y: covariance(x, y) / (math.sqrt(dispersion(x)) * math.sqrt(dispersion(y)))

In [None]:
fxy = lambda x, y: (16 / 9) * (x + (y ** 3) / 4)
fx = lambda x: (16 * x + 1) / 9
fy = lambda y: (8 + 4 * y ** 3) / 9

a = 0
b = 1
fxy_max = fxy(b, a)

N = 1_000_000

x_list, y_list = [], []
for _ in range(N):
    while True:
        x_, y_ = (a + r.random() * (b - a) for _ in range(2))
        if fxy(x_, y_) > r.random() * fxy_max:
            x_list.append(x_)
            y_list.append(y_)
            break

In [None]:
params = {
    'a': a,
    'b': b,
    'gfun': lambda x: a,
    'hfun': lambda x: b
}

mx_theoretical = _in.dblquad(lambda x, y: x * fxy(x,y), **params)[0]
my_theoretical = _in.dblquad(lambda x, y: y * fxy(x,y), **params)[0]

dx_theoretical = _in.dblquad(lambda x, y: (x - mx_theoretical) ** 2 * fxy(x, y), **params)[0]
dy_theoretical = _in.dblquad(lambda x, y: (y - my_theoretical) ** 2 * fxy(x, y), **params)[0]

rxy_theoretical = np.corrcoef(x_list, y_list)[0][1]

mx_actual = mean(x_list)
my_actual = mean(y_list)

dx_actual = dispersion(x_list)
dy_actual = dispersion(y_list)

rxy_actual = corelation(x_list, y_list)

print('Theoretical')
print('M[X]:', mx_theoretical, '\t', 'M[Y]:', my_theoretical)
print('D[X]:', dx_theoretical, '\t','D[Y]:', dy_theoretical)
print('r[XY]:', rxy_theoretical)
print()
print('Actual')
print('M[X]:', mx_actual, '\t', 'M[Y]:', my_actual)
print('D[X]:', dx_actual, '\t',  'D[Y]:', dy_actual)
print('r[XY]:', rxy_actual)

In [None]:
def confidence_interval(list, metric, confidence=0.95):
    return st.t.interval(confidence, df=len(list)-1, loc=metric, scale=st.sem(list))

interval_mx = confidence_interval(x_list, mx_actual)
interval_my = confidence_interval(y_list, my_actual)

interval_dx = confidence_interval(x_list, dx_actual)
interval_dy = confidence_interval(y_list, dy_actual)

interval_rxy = tuple(st.pearsonr(x_list, y_list))

print('M[X]:', interval_mx, '\t', 'M[Y]:', interval_my)
print('D[X]:', interval_dx, '\t', 'D[Y]:', interval_dy)
print('r[XY]:', interval_rxy)

In [None]:
x_range = np.linspace(a, b, 500)
fx_x = [fx(x) for x in x_range]
plt.hist([x for x in x_list], density=True)
plt.plot(x_range, fx_x)
plt.title('X')
plt.show()

y_range = np.linspace(a, b, 500)
fy_y = [fy(y) for y in y_range]
plt.hist([y for y in y_list], density=True)
plt.plot(y_range, fy_y)
plt.title('Y')
plt.show()

### Task2

In [None]:
def generate_dsv(x, y, P):
    n, m = len(x), len(y)
    q = [sum(P[:, i]) for i in range(n)]
    Fx = [sum(q[:k + 1]) for k in range(n)]

    x_i = 0
    e = r.random()
    while e > Fx[x_i]:
        x_i += 1

    Fy = [sum(P[:k + 1, x_i]) for k in range(m)]

    y_i = 0
    e = r.random() * max(Fy)
    while e > Fy[y_i]:
        y_i += 1
        
    return x[x_i], y[y_i]

In [None]:
x = [1, 2, 3, 4, 5]
y = [6, 7, 8, 9, 10]

n = len(x)
m = len(y)

P = np.array(
    [[0.05, 0.01, 0.05, 0.03, 0.10],
     [0.09, 0.03, 0.06, 0.14, 0.04],
     [0.05, 0.01, 0.03, 0.01, 0.05],
     [0.03, 0.01, 0.01, 0.03, 0.04],
     [0.04, 0.03, 0.03, 0.02, 0.01]])

generate_dsv(x, y, P)

In [None]:
N = 10000

x_values = []
y_values = [] 

for _ in range(N):
    _x, _y = generate_dsv(x, y, P)
    x_values.append(_x)
    y_values.append(_y)

In [None]:
p_x_theoretical = [sum(P[:, i]) for i in range(n)]
p_y_theoretical = [sum(P[j, :]) for j in range(m)]

p_x_actual = [x_values.count(_x) / N for _x in x]
p_y_actual = [y_values.count(_y) / N for _y in y]

print('Theoretical')
print('p(x):', p_x_theoretical)
print('p(y):', p_y_theoretical)
print()
print('Actual')
print('p(x):', p_x_actual)
print('p(y):', p_y_actual)

In [None]:
def covariation(P, p_x, p_y): 
    diffs = []
    for i in range(len(p_x)):
        for j in range(len(p_y)):
            diffs.append(abs(P[j, i] - p_x[i] * p_y[j]))
    return np.average(diffs)

print(covariation(P, p_x_actual, p_y_actual))

In [None]:
p_yx = np.copy(P)
for i in range(n):
    for j in range(m):
        p_yx[j, i] /= p_x_actual[i]

p_xy = np.copy(P)
for i in range(n):
    for j in range(m):
        p_xy[j, i] /= p_y_actual[j]
    
print('p(y|x):', p_yx)
print()
print('p(x|y):', p_xy)

In [None]:
def draw_plot(data, values, probabilities, title):
    plt.hist(values, weights=[1/N]*N)
    plt.title(title)
    plt.plot(data, probabilities) 
    plt.show()

draw_plot(x, x_values, p_x_actual, 'X')
draw_plot(y, y_values, p_y_actual, 'Y')

In [None]:
def discrete_mean(x, p_x):
    return sum([x[i] * p_x[i] for i in range(len(x))])

def discrete_dispersion(x, p_x, M):
    return sum([(x[i] ** 2) * p_x[i] for i in range(len(x))]) - M ** 2

def discrete_corelation(x, y, mx, my):
    numerator = sum((_x - mx) * (_y - my) for _x, _y in zip(x, y))
    sum_x2 = sum((_x - mx) ** 2 for _x in x)
    sum_y2 = sum((_y - my) ** 2 for _y in y)
    return numerator / np.sqrt(sum_x2 * sum_y2)

In [None]:
mx_theoretical = discrete_mean(x, p_x_theoretical)
my_theoretical = discrete_mean(y, p_y_theoretical)

dx_theoretical = discrete_dispersion(x, p_x_theoretical, mx_theoretical)
dy_theoretical = discrete_dispersion(y, p_y_theoretical, my_theoretical)

mxy_theoretical = sum([sum([x * y * P[j, i] for j, y in enumerate(y)]) for i, x in enumerate(x)])

rxy_theoretical = (mxy_theoretical - mx_theoretical * my_theoretical) / np.sqrt(dx_theoretical * dy_theoretical)

mx_actual = discrete_mean(x, p_x_actual)
my_actual = discrete_mean(y, p_y_actual)

dx_actual = discrete_dispersion(x, p_x_actual, mx_actual)
dy_actual = discrete_dispersion(y, p_y_actual, my_actual)

rxy_actual = discrete_corelation(x_values, y_values, mx_actual, my_actual)

print('Theoretical')
print('M[X]:', mx_theoretical, '\t', 'M[Y]:', my_theoretical)
print('D[X]:', dx_theoretical, '\t', 'D[Y]:', dy_theoretical)
print('r[XY]:', rxy_theoretical)
print()
print('Actual')
print('M[X]:', mx_actual, '\t', 'M[Y]:', my_actual)
print('D[X]:', dx_actual, '\t', 'D[Y]:', dy_actual)
print('r[XY]:', rxy_actual)

In [None]:
def mean(data):
    return sum(data) / len(data)

def ci_correlation(x, y, confidence=0.95):
    r = discrete_corelation(x, y, mean(x), mean(y))
    z = 1/2 * np.log((1 + r) / (1 - r))
    z_crit = st.norm.ppf(1 - confidence / 2)
    se = 1 / np.sqrt(len(x) - 3)
    left_z, right_z = (z - z_crit * se, z + z_crit * se)
    left, right = np.tanh(left_z), np.tanh(right_z)
    return left, right

In [None]:
interval_mx = confidence_interval(x_values, mx_actual)
interval_my = confidence_interval(y_values, my_actual)

interval_dx = confidence_interval(x_values, dx_actual)
interval_dy = confidence_interval(y_values, dy_actual)

interval_rxy = ci_correlation(x_values, y_values)

print('M[X]:', interval_mx, '\t', 'M[Y]:', interval_my)
print('D[X]:', interval_dx, '\t', 'D[Y]:', interval_dy)
print('r[XY]:', interval_rxy)