# $\pi$ - Strata proportions estimation

In [1]:
from typing import List, Dict, Tuple

import pandas as pd
from numpy import random

from consts import default_random_seed
from sample_generation import create_sample
from strata import Strata

random.seed(default_random_seed)

In [2]:
def bound_strata_proportions(df: pd.DataFrame) -> Dict[Strata, Tuple[float, float]]:
    p_t1_d0 = df.loc[(df.D1==0)&(df.t==1)].shape[0]/df.loc[df.t==1].shape[0]
    p_t0_d0 = df.loc[(df.D0==0)&(df.t==0)].shape[0]/df.loc[df.t==0].shape[0]

    pi_h_lower = max(0, p_t0_d0 - p_t1_d0)
    pi_h_upper = min(p_t0_d0, 1 - p_t1_d0)

    pi_as_lower = p_t0_d0 - pi_h_upper
    pi_as_upper = p_t0_d0 - pi_h_lower

    pi_p_lower = p_t1_d0 - pi_as_upper
    pi_p_upper = p_t1_d0 - pi_as_lower

    pi_d_lower = 1 - p_t1_d0 - pi_h_upper
    pi_d_upper = 1 - p_t1_d0 - pi_h_lower

    return {Strata.H: (pi_h_lower, pi_h_upper), Strata.AS: (pi_as_lower, pi_as_upper),
            Strata.P: (pi_p_lower, pi_p_upper), Strata.D: (pi_d_lower, pi_d_upper)}


In [3]:
def check_strata_for_different_beta(beta_d_list: List[float] = [[0.0, 0.0, 0.0], [-2.0, -2.0, 1.0], [0.0, 5.0, 0.0], [0.0, 10.0, 0.0]]):
    for beta_d in beta_d_list:
        print(f"\nfor beta_d={beta_d}:")
        sample_for_bounds = create_sample(beta_d = beta_d)
        prprtn_bounds = bound_strata_proportions(sample_for_bounds)

        for stratum , bounds in prprtn_bounds.items():
            true_pi  = round(100*sample_for_bounds.loc[sample_for_bounds.stratum==stratum.name].shape[0]/sample_for_bounds.shape[0],2)
            lower_bound = round(100*bounds[0],2)
            upper_bound = round(100*bounds[1],2)

            within_bounds = "✔" if lower_bound<=true_pi<=upper_bound else "✘"

            print(f"Stratum {stratum.name} real value is: {true_pi}%, and it is bounded by: [{lower_bound}%, {upper_bound}%]   {within_bounds}")

In [4]:
check_strata_for_different_beta()


for beta_d=[0.0, 0.0, 0.0]:
Stratum H real value is: 23.9%, and it is bounded by: [0%, 48.64%]   ✔
Stratum AS real value is: 25.9%, and it is bounded by: [0.74%, 49.38%]   ✔
Stratum P real value is: 25.3%, and it is bounded by: [1.98%, 50.62%]   ✔
Stratum D real value is: 24.9%, and it is bounded by: [0.0%, 48.64%]   ✔

for beta_d=[-2.0, -2.0, 1.0]:
Stratum H real value is: 1.4%, and it is bounded by: [0%, 2.33%]   ✔
Stratum AS real value is: 84.8%, and it is bounded by: [85.07%, 87.4%]   ✘
Stratum P real value is: 13.4%, and it is bounded by: [10.28%, 12.6%]   ✘
Stratum D real value is: 0.4%, and it is bounded by: [0.0%, 2.33%]   ✔

for beta_d=[0.0, 5.0, 0.0]:
Stratum H real value is: 49.4%, and it is bounded by: [48.6%, 49.38%]   ✘
Stratum AS real value is: 0.4%, and it is bounded by: [0.0%, 0.78%]   ✔
Stratum P real value is: 0.3%, and it is bounded by: [-0.0%, 0.78%]   ✔
Stratum D real value is: 49.9%, and it is bounded by: [49.84%, 50.62%]   ✔

for beta_d=[0.0, 10.0, 0.0]:
Stratu

Interesting to see the results for extreme distribution (for example with $\beta_D=[0,10,0]$)
