## <center> Packages <center>

In [1]:
from tests_impl import *
from tqdm.auto import tqdm
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

## <center> 1.1. Relevance Test <center>

In [3]:
# see test_impl: unconditional_relevance

## <center> 1.2 Simulation and sanity check<center>

In [None]:
T = 200
beta1, beta2 = 4, 0.005

np.random.seed(42)
x1 = np.random.normal(10, 3, size=T)
x2 = np.random.exponential(100, size=T)
eps = np.random.normal(0, 1, size=T)

y = beta1 * x1 + beta2 * x2 + eps
data = {
    'y' : y,
    'x1' : x1,
    'x2' : x2,
    'z1' : np.random.normal(-10, 20, size=T) * np.random.exponential(1000, size=T)
}

def make_moment(name):
    def moment(theta, dp):
        resid = dp['y'] - theta[0] * dp['x1'] - theta[1] * dp['x2']
        return resid * dp[name]
    return moment

z_relx1 = x1 + np.random.normal(0, 1, size=T)
z_relx2 = x2 + np.random.normal(0, 1, size=T)
z_mix = 0.5 * x1 + 0.5 * x2 + np.random.normal(0, 1, size=T)
z_noise1 = np.random.normal(0, 1, size=T)
z_noise2 = np.random.uniform(-1, 1, size=T)

data['z_relx1'] = z_relx1
data['z_relx2'] = z_relx2
data['z_mix'] = z_mix
data['z_noise1'] = z_noise1
data['z_noise2'] = z_noise2

names = ['x1', 'x2', 'z1', 'z_relx1', 'z_relx2', 'z_mix', 'z_noise1', 'z_noise2']
moments = [make_moment(n) for n in names]

theta_init = np.random.rand(2)

idx = names.index('z_relx1')
W, pval, theta_est = unconditional_relevance(
    data=data,
    moments=moments,
    f2_indexes=[idx],
    theta_init=theta_init,
)
print(f"z_relx1: {W=}, {pval=}, theta=({theta_est[0]:.6f}, {theta_est[1]:.6f})")

idx = names.index('z_relx2')
W, pval, theta_est = unconditional_relevance(
    data=data,
    moments=moments,
    f2_indexes=[idx],
    theta_init=theta_init,
)
print(f"z_relx2: {W=}, {pval=}, theta=({theta_est[0]:.6f}, {theta_est[1]:.6f})")

idx = names.index('z_mix')
W, pval, theta_est = unconditional_relevance(
    data=data,
    moments=moments,
    f2_indexes=[idx],
    theta_init=theta_init,
)
print(f"z_mix: {W=}, {pval=}, theta=({theta_est[0]:.6f}, {theta_est[1]:.6f})")

idx = names.index('z1')
W, pval, theta_est = unconditional_relevance(
    data=data,
    moments=moments,
    f2_indexes=[idx],
    theta_init=theta_init,
)
print(f"z1: {W=}, {pval=}, theta=({theta_est[0]:.6f}, {theta_est[1]:.6f})")

idx = names.index('z_noise1')
W, pval, theta_est = unconditional_relevance(
    data=data,
    moments=moments,
    f2_indexes=[idx],
    theta_init=theta_init,
)
print(f"z_noise1: {W=}, {pval=}, theta=({theta_est[0]:.6f}, {theta_est[1]:.6f})")

idx = names.index('z_noise2')
W, pval, theta_est = unconditional_relevance(
    data=data,
    moments=moments,
    f2_indexes=[idx],
    theta_init=theta_init,
)
print(f"z_noise2: {W=}, {pval=}, theta=({theta_est[0]:.6f}, {theta_est[1]:.6f})")

idxs = [names.index('z_noise1'), names.index('z_noise2')]
W, pval, theta_est = unconditional_relevance(
    data=data,
    moments=moments,
    f2_indexes=idxs,
    theta_init=theta_init,
)
print(f"z_noise1&z_noise2: {W=}, {pval=}, theta=({theta_est[0]:.6f}, {theta_est[1]:.6f})")

## <center> 2.1. Conditional Relevance <center>

In [8]:
# see test_impl: conditional_relevance

## <center> 2.2 Simulation and sanity check <center>

In [289]:
import numpy as np
import autograd.numpy as anp
from typing import Dict, Any, Callable, List

T = 600

z1_raw = rng.normal(size=T)
z2_raw = rng.normal(size=T)
def standardize(z):
    zc = z - z.mean()
    s = zc.std(ddof=1)
    return zc / s if s > 0 else zc

z1 = standardize(z1_raw)
z2 = standardize(z2_raw)
v  = rng.normal(scale=0.7, size=T)
x_raw  = 0.8*z1 + 0.8*z2 + v
x = standardize(x_raw)

alpha_true, beta_true, gamma_true = -0.4, 1.0, -0.7
linidx = alpha_true + beta_true*x + gamma_true*(x**2)

def sigmoid_stable(u):
    return 0.5 * (1.0 + anp.tanh(0.5 * u))

p = 1.0 / (1.0 + np.exp(-linidx))
y = rng.binomial(1, p, size=T)

z_nl   = standardize(z1**2 + z2**2)
z_red  = standardize(0.7*z1 + 0.3*z2 + 0.02*rng.normal(size=T))
z_noise = standardize(rng.normal(size=T))
const  = np.ones(T)

data: Dict[str, np.ndarray] = {
    'y': y, 'x': x,
    'const': const, 'z1': z1, 'z2': z2,
    'z_nl': z_nl, 'z_red': z_red, 'z_noise': z_noise
}

def make_moment(name: str) -> Callable:
    def moment(theta, dp: Dict[str, Any]):
        lin = theta[0] + theta[1]*dp['x'] + theta[2]*(dp['x']**2)
        mu = 0.5 * (1.0 + anp.tanh(0.5 * lin))
        return dp[name] * (dp['y'] - mu)
    return moment

instr_order = ['const', 'z1', 'z2', 'z_nl', 'z_red', 'z_noise']
moments = [make_moment(k) for k in instr_order]

theta_init = np.array([0.0, 0.2, -0.2], dtype=float)

def run_cond_test(label: str, expect_significant: bool):
    idx = [instr_order.index(label)]
    W, pval, theta_hat = conditional_relevance(
        data=data,
        moments=moments,
        f2_indexes=idx,
        theta_init=theta_init,
        verbose=False
    )
    got_significant = (pval < 0.05)
    exp_txt = "ожидаем: релевантен (p<0.05)" if expect_significant else "ожидаем: нерелевантен (p>=0.05)"
    got_txt = f"получили: p={pval:.4f} ({'релевантен' if got_significant else 'нерелевантен'})"
    status = "OK" if got_significant == expect_significant else "MISMATCH"
    print(f"{label:8s} | W={W:9.3f} | {exp_txt:32s} | {got_txt:36s} | θ̂=({theta_hat[0]:.3f},{theta_hat[1]:.3f},{theta_hat[2]:.3f}) | {status}")
    return W, pval, theta_hat

print("Условная релевантность (H0: GΔ = 0), df = m2*k = 1*3 = 3")
print("- Кандидаты тестируются «сверх» базы {const, z1, z2}.\n")

run_cond_test('z_nl',   expect_significant=True)

run_cond_test('z_red',  expect_significant=False)

run_cond_test('z_noise', expect_significant=False)

Условная релевантность (H0: GΔ = 0), df = m2*k = 1*3 = 3
- Кандидаты тестируются «сверх» базы {const, z1, z2}.

z_nl     | W=   12.093 | ожидаем: релевантен (p<0.05)     | получили: p=0.0071 (релевантен)      | θ̂=(-0.591,0.839,-0.442) | OK
z_red    | W=    1.485 | ожидаем: нерелевантен (p>=0.05)  | получили: p=0.6857 (нерелевантен)    | θ̂=(-0.591,0.839,-0.442) | OK
z_noise  | W=    2.530 | ожидаем: нерелевантен (p>=0.05)  | получили: p=0.4699 (нерелевантен)    | θ̂=(-0.591,0.839,-0.442) | OK


(2.5297485273166602,
 0.4699388399579768,
 array([-0.59050247,  0.83911014, -0.44245857]))