In [1]:
import numpy as np
import pandas as pd
from IPython.display import display
import matplotlib.pyplot as plt
%config InlineBackend.figure_formats = ['svg']
import matplotlib
matplotlib.rcParams['text.usetex'] = True
matplotlib.rcParams['font.sans-serif'] = ['FreeSans']
import seaborn as sns
import itertools
from tqdm import tqdm
import joblib

In [2]:
d0 = np.array([1.])

In [3]:
Rs = [
    np.array([[1., 2.],]),
    np.array([[-1., 1.],]),
    np.array([[-1., -2.],]),
]
sigmas = [
    np.array([[0.5, 0.5],]),
    np.array([[0.5, 0.5],]),
    np.array([[0.5, 0.5],]),
]

In [4]:
πs = [
    np.array([[1., 0.],]),
    np.array([[0., 1.],]),
    np.array([[0.5, 0.5],]),
    np.array([[0.1, 0.9],]),
    np.array([[0.8, 0.2],]),
]

In [5]:
use_πD = False

In [6]:
N = 1
runs = 1000

In [7]:
def single_run():
    np.random.seed(42)

    # True value of π_e
    Js = []
    for seed in range(runs):
        rng = np.random.default_rng(seed=10+seed)
        x = rng.choice(1, size=N, p=d0)
        a = np.array([rng.choice(2, p=π_e[xi]) for xi in x])
        r = np.array([rng.normal(R[xi,ai], sigma[xi,ai]) for xi,ai in zip(x,a)])
        J = np.sum(r) / N
        Js.append(J)

    # Standard IS
    Gs = []
    OISs = []
    WISs = []
    for seed in range(runs):
        rng = np.random.default_rng(seed=10+seed)
        x = rng.choice(1, size=N, p=d0)
        a = np.array([rng.choice(2, p=π_b[xi]) for xi in x])
        r = np.array([rng.normal(R[xi,ai], sigma[xi,ai]) for xi,ai in zip(x,a)])
        G = np.sum(r) / N
        Gs.append(G)

        if use_πD:
            π_b_ = np.array([
                [(np.sum((x==0)&(a==0)))/np.sum(x==0), 
                 (np.sum((x==0)&(a==1)))/np.sum(x==0)],
            ])
        else:
            π_b_ = π_b

        rho = π_e[x,a] / π_b_[x,a]
        OISs.append(np.sum(rho * r) / N)
        WISs.append(np.sum(rho * r) / np.sum(rho))


    # Collect data using π_b - combining counterfactuals with factuals
    FC_OISs_w = []
    FC_WISs_w = []
    for seed in range(runs):
        rng = np.random.default_rng(seed=10+seed)
        rng_c = np.random.default_rng(seed=100000+seed)
        x = rng.choice(1, size=N, p=d0)
        a = np.array([rng.choice(2, p=π_b[xi]) for xi in x])
        r = np.array([rng.normal(R[xi,ai], sigma[xi,ai]) for xi,ai in zip(x,a)])
        rho = π_e[x,a] / π_b[x,a]

        # counterfactual flag
        c = np.array([rng_c.choice(2, p=[1-Pc[xi,ai], Pc[xi,ai]]) for xi,ai in zip(x,a)])

        # counterfactual reward
        rc = np.array([rng_c.normal(R[xi,1-ai], sigma[xi,1-ai]) for xi,ai in zip(x,a)])
        rc[c==0] = np.nan

        # trajectory-wise weight
        w = np.ones(N)
        w[c==1] = ww[x[c==1], a[c==1], a[c==1]]
        wc = np.zeros(N)
        wc[c==1] = ww[x[c==1], a[c==1], 1-a[c==1]]

        if use_πD:
            # augmented behavior policy
            π_b_ = np.array([
                [(np.sum(w*((x==0)&(a==0)))+np.sum(wc*((x==0)&(a==1)&(c==1))))/(np.sum(w*(x==0))+np.sum(wc*((x==0)&(c==1)))), 
                 (np.sum(w*((x==0)&(a==1)))+np.sum(wc*((x==0)&(a==0)&(c==1))))/(np.sum(w*(x==0))+np.sum(wc*((x==0)&(c==1))))],
            ])
        else:
            # augmented behavior policy
            π_b_ = np.array([
                [π_b[0,0]*ww[0,0,0]+π_b[0,1]*ww[0,1,0], π_b[0,0]*ww[0,0,1]+π_b[0,1]*ww[0,1,1]],
            ])
            π_b_ = π_b_ / π_b_.sum(axis=1, keepdims=True)

        FC_OISs_w.append(
            (np.sum(w* π_e[x,a] / π_b_[x,a] * r) + np.nansum(wc* π_e[x,1-a] / π_b_[x,1-a] * rc)) / (N)
        )
        FC_WISs_w.append(
            (np.sum(w* π_e[x,a] / π_b_[x,a] * r) + np.nansum(wc* π_e[x,1-a] / π_b_[x,1-a] * rc)) / (np.sum(w* π_e[x,a] / π_b_[x,a]) + np.sum((wc* π_e[x,1-a] / π_b_[x,1-a])[c==1])),
        )

    df_bias_var = []
    for name, values in [
        ('$\hat{v}(\pi_e)$', Js),
        ('$\hat{v}(\pi_b)$', Gs),
        ('OIS', OISs),
        ('WIS', WISs),
        ('C-OIS', FC_OISs_w),
        ('C-WIS', FC_WISs_w),
    ]:
        df_bias_var.append([name, 
                            np.mean(values), 
                            np.mean(values - d0@np.sum(π_e*R, axis=1)), 
                            np.sqrt(np.var(values)), 
                            np.sqrt(np.mean(np.square(values - d0@np.sum(π_e*R, axis=1))))])
    return pd.DataFrame(df_bias_var, columns=['Approach', 'Mean', 'Bias', 'Std', 'RMSE'])

# Ideal counterfactual annotations, equal weights

In [8]:
# Counterfactual-augmented IS
## probability of getting a counterfactual annotation
Pc = np.array([
    [1., 1.],
])
## Weights assigned to factual and counterfactuals
ww = np.array([[
    [0.5, 0.5],
    [0.5, 0.5],
]])

## Rs[0] setting

In [9]:
R, sigma = Rs[0], sigmas[0]

In [10]:
df_out_all_0 = []
for π_b in πs:
    for π_e in πs:
        df_out = single_run()
        df_out_all_0.append(df_out)

  WISs.append(np.sum(rho * r) / np.sum(rho))


In [11]:
df_tmp = pd.DataFrame(index=[str(π)[1:-1] for π in πs], columns=[str(π)[1:-1] for π in πs])
df_tmp.index.name = 'π_b'
df_tmp.columns.name = 'π_e'
for (i, π_b), (j, π_e) in itertools.product(enumerate(πs), enumerate(πs)):
    ix = i*len(πs)+j
    df_tmp.iloc[i,j] = str(df_out_all_0[ix].iloc[[2,4], [2,3,4]].round(2).values)[1:-1]
display(df_tmp.style.set_properties(**{'white-space': 'pre-wrap'}))

π_e,[1. 0.],[0. 1.],[0.5 0.5],[0.1 0.9],[0.8 0.2]
π_b,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
[1. 0.],[0.03 0.5 0.5 ]  [0.03 0.5 0.5 ],[-2. 0. 2. ]  [ 0.02 0.47 0.47],[-0.98 0.25 1.01]  [ 0.03 0.34 0.35],[-1.8 0.05 1.8 ]  [ 0.02 0.43 0.43],[-0.37 0.4 0.55]  [ 0.03 0.41 0.41]
[0. 1.],[-1. 0. 1. ]  [ 0.02 0.47 0.47],[0.03 0.5 0.5 ]  [0.03 0.5 0.5 ],[-0.48 0.25 0.54]  [ 0.03 0.34 0.35],[-0.07 0.45 0.45]  [ 0.03 0.45 0.45],[-0.79 0.1 0.8 ]  [ 0.02 0.39 0.39]
[0.5 0.5],[0.01 1.23 1.23]  [0.03 0.47 0.47],[0.09 2.17 2.17]  [0.02 0.5 0.5 ],[0.05 0.71 0.71]  [0.03 0.34 0.35],[0.08 1.86 1.86]  [0.02 0.46 0.46],[0.02 0.69 0.69]  [0.03 0.39 0.39]
[0.1 0.9],[0.08 3.46 3.46]  [0.01 0.47 0.47],[0.02 0.88 0.88]  [0.04 0.5 0.5 ],[0.05 1.45 1.45]  [0.03 0.34 0.35],[0.03 0.59 0.59]  [0.03 0.45 0.45],[0.07 2.64 2.64]  [0.02 0.39 0.39]
[0.8 0.2],[0.03 0.75 0.75]  [0.04 0.48 0.48],[0.05 4.28 4.28]  [0.01 0.49 0.49],[0.04 1.92 1.92]  [0.03 0.34 0.35],[0.05 3.8 3.81]  [0.02 0.44 0.44],[0.03 0.65 0.65]  [0.03 0.4 0.4 ]


In [None]:
for (i, π_b), (j, π_e) in itertools.product(enumerate(πs), enumerate(πs)):
    ix = i*len(πs)+j
    values = df_out_all_0[ix].iloc[[2,4], [2,3,4]].round(2).values
    print("""\mask{{0.00}}{{{}}} & \mask{{0.00}}{{{}}} & \mask{{0.00}}{{{}}} \\\\ \mask{{0.00}}{{{}}} & \mask{{0.00}}{{{}}} & \mask{{0.00}}{{{}}}""".format(*list(values.ravel())))

In [14]:
for (i, π_b) in enumerate(πs):
    print("""$[\mask{{0.0}}{{{}}},\mask{{0.0}}{{{}}}]$ """.format(π_b[0,0], π_b[0,1]))
    for (j, π_e) in enumerate(πs):
        ix = i*len(πs)+j
        values = df_out_all_0[ix].iloc[[2,4], [2,3,4]].round(2).values
        print("""& \\scalebox{0.8}{$\\begin{matrix} """
              + """\mask{{0.00}}{{{}}} & \mask{{0.00}}{{{}}} & \mask{{0.00}}{{{}}} \\\\ \mask{{0.00}}{{{}}} & \mask{{0.00}}{{{}}} & \mask{{0.00}}{{{}}}""".format(
                  *[int(x) if x.is_integer() else x for x in list(values.ravel())]).replace('-', '\\shortminus ')
              + """ \end{matrix}$} """)
    print("""\\\\[12pt]""")

$[\mask{0.0}{1.0},\mask{0.0}{0.0}]$ 
& \scalebox{0.8}{$\begin{matrix} \mask{0.00}{0.03} & \mask{0.00}{0.5} & \mask{0.00}{0.5} \\ \mask{0.00}{0.03} & \mask{0.00}{0.5} & \mask{0.00}{0.5} \end{matrix}$} 
& \scalebox{0.8}{$\begin{matrix} \mask{0.00}{\shortminus 2} & \mask{0.00}{0} & \mask{0.00}{2} \\ \mask{0.00}{0.02} & \mask{0.00}{0.47} & \mask{0.00}{0.47} \end{matrix}$} 
& \scalebox{0.8}{$\begin{matrix} \mask{0.00}{\shortminus 0.98} & \mask{0.00}{0.25} & \mask{0.00}{1.01} \\ \mask{0.00}{0.03} & \mask{0.00}{0.34} & \mask{0.00}{0.35} \end{matrix}$} 
& \scalebox{0.8}{$\begin{matrix} \mask{0.00}{\shortminus 1.8} & \mask{0.00}{0.05} & \mask{0.00}{1.8} \\ \mask{0.00}{0.02} & \mask{0.00}{0.43} & \mask{0.00}{0.43} \end{matrix}$} 
& \scalebox{0.8}{$\begin{matrix} \mask{0.00}{\shortminus 0.37} & \mask{0.00}{0.4} & \mask{0.00}{0.55} \\ \mask{0.00}{0.03} & \mask{0.00}{0.41} & \mask{0.00}{0.41} \end{matrix}$} 
\\[12pt]
$[\mask{0.0}{0.0},\mask{0.0}{1.0}]$ 
& \scalebox{0.8}{$\begin{matrix} \mask{0.00}{

## Rs[1] setting

In [15]:
R, sigma = Rs[1], sigmas[1]

In [16]:
df_out_all_1 = []
for π_b in πs:
    for π_e in πs:
        df_out = single_run()
        df_out_all_1.append(df_out)

  WISs.append(np.sum(rho * r) / np.sum(rho))


In [17]:
df_tmp = pd.DataFrame(index=[str(π)[1:-1] for π in πs], columns=[str(π)[1:-1] for π in πs])
df_tmp.index.name = 'π_b'
df_tmp.columns.name = 'π_e'
for (i, π_b), (j, π_e) in itertools.product(enumerate(πs), enumerate(πs)):
    ix = i*len(πs)+j
    df_tmp.iloc[i,j] = str(df_out_all_1[ix].iloc[[2,4], [2,3,4]].round(2).values)[1:-1]
display(df_tmp.style.set_properties(**{'white-space': 'pre-wrap'}))

π_e,[1. 0.],[0. 1.],[0.5 0.5],[0.1 0.9],[0.8 0.2]
π_b,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
[1. 0.],[0.03 0.5 0.5 ]  [0.03 0.5 0.5 ],[-1. 0. 1. ]  [ 0.02 0.47 0.47],[-0.48 0.25 0.54]  [ 0.03 0.34 0.35],[-0.9 0.05 0.9 ]  [ 0.02 0.43 0.43],[-0.17 0.4 0.43]  [ 0.03 0.41 0.41]
[0. 1.],[1. 0. 1. ]  [0.02 0.47 0.47],[0.03 0.5 0.5 ]  [0.03 0.5 0.5 ],[0.52 0.25 0.57]  [0.03 0.34 0.35],[0.13 0.45 0.47]  [0.03 0.45 0.45],[0.81 0.1 0.81]  [0.02 0.39 0.39]
[0.5 0.5],[0.06 1.17 1.17]  [0.03 0.47 0.47],[0.06 1.28 1.28]  [0.02 0.5 0.5 ],[0.06 1.12 1.12]  [0.03 0.34 0.35],[0.06 1.23 1.23]  [0.02 0.46 0.46],[0.06 1.13 1.13]  [0.03 0.39 0.39]
[0.1 0.9],[-0.04 3.37 3.37]  [ 0.01 0.47 0.47],[0.03 0.64 0.64]  [0.04 0.5 0.5 ],[-0.01 1.86 1.86]  [ 0.03 0.34 0.35],[0.02 0.8 0.8 ]  [0.03 0.45 0.45],[-0.03 2.76 2.76]  [ 0.02 0.39 0.39]
[0.8 0.2],[0.02 0.74 0.74]  [0.04 0.48 0.48],[0.06 2.42 2.42]  [0.01 0.49 0.49],[0.04 1.46 1.46]  [0.03 0.34 0.35],[0.06 2.22 2.23]  [0.02 0.44 0.44],[0.03 0.95 0.96]  [0.03 0.4 0.4 ]


In [18]:
for (i, π_b) in enumerate(πs):
    print("""$[\mask{{0.0}}{{{}}},\mask{{0.0}}{{{}}}]$ """.format(π_b[0,0], π_b[0,1]))
    for (j, π_e) in enumerate(πs):
        ix = i*len(πs)+j
        values = df_out_all_1[ix].iloc[[2,4], [2,3,4]].round(2).values
        print("""& \\scalebox{0.8}{$\\begin{matrix} """
              + """\mask{{0.00}}{{{}}} & \mask{{0.00}}{{{}}} & \mask{{0.00}}{{{}}} \\\\ \mask{{0.00}}{{{}}} & \mask{{0.00}}{{{}}} & \mask{{0.00}}{{{}}}""".format(
                  *[int(x) if x.is_integer() else x for x in list(values.ravel())]).replace('-', '\\shortminus ')
              + """ \end{matrix}$} """)
    print("""\\\\[12pt]""")

$[\mask{0.0}{1.0},\mask{0.0}{0.0}]$ 
& \scalebox{0.8}{$\begin{matrix} \mask{0.00}{0.03} & \mask{0.00}{0.5} & \mask{0.00}{0.5} \\ \mask{0.00}{0.03} & \mask{0.00}{0.5} & \mask{0.00}{0.5} \end{matrix}$} 
& \scalebox{0.8}{$\begin{matrix} \mask{0.00}{\shortminus 1} & \mask{0.00}{0} & \mask{0.00}{1} \\ \mask{0.00}{0.02} & \mask{0.00}{0.47} & \mask{0.00}{0.47} \end{matrix}$} 
& \scalebox{0.8}{$\begin{matrix} \mask{0.00}{\shortminus 0.48} & \mask{0.00}{0.25} & \mask{0.00}{0.54} \\ \mask{0.00}{0.03} & \mask{0.00}{0.34} & \mask{0.00}{0.35} \end{matrix}$} 
& \scalebox{0.8}{$\begin{matrix} \mask{0.00}{\shortminus 0.9} & \mask{0.00}{0.05} & \mask{0.00}{0.9} \\ \mask{0.00}{0.02} & \mask{0.00}{0.43} & \mask{0.00}{0.43} \end{matrix}$} 
& \scalebox{0.8}{$\begin{matrix} \mask{0.00}{\shortminus 0.17} & \mask{0.00}{0.4} & \mask{0.00}{0.43} \\ \mask{0.00}{0.03} & \mask{0.00}{0.41} & \mask{0.00}{0.41} \end{matrix}$} 
\\[12pt]
$[\mask{0.0}{0.0},\mask{0.0}{1.0}]$ 
& \scalebox{0.8}{$\begin{matrix} \mask{0.00}{

## Rs[2] setting

In [19]:
R, sigma = Rs[2], sigmas[2]

In [20]:
df_out_all_2 = []
for π_b in πs:
    for π_e in πs:
        df_out = single_run()
        df_out_all_2.append(df_out)

  WISs.append(np.sum(rho * r) / np.sum(rho))


In [21]:
df_tmp = pd.DataFrame(index=[str(π)[1:-1] for π in πs], columns=[str(π)[1:-1] for π in πs])
df_tmp.index.name = 'π_b'
df_tmp.columns.name = 'π_e'
for (i, π_b), (j, π_e) in itertools.product(enumerate(πs), enumerate(πs)):
    ix = i*len(πs)+j
    df_tmp.iloc[i,j] = str(df_out_all_2[ix].iloc[[2,4], [2,3,4]].round(2).values)[1:-1]
display(df_tmp.style.set_properties(**{'white-space': 'pre-wrap'}))

π_e,[1. 0.],[0. 1.],[0.5 0.5],[0.1 0.9],[0.8 0.2]
π_b,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
[1. 0.],[0.03 0.5 0.5 ]  [0.03 0.5 0.5 ],[2. 0. 2. ]  [0.02 0.47 0.47],[1.02 0.25 1.05]  [0.03 0.34 0.35],[1.8 0.05 1.8 ]  [0.02 0.43 0.43],[0.43 0.4 0.58]  [0.03 0.41 0.41]
[0. 1.],[1. 0. 1. ]  [0.02 0.47 0.47],[0.03 0.5 0.5 ]  [0.03 0.5 0.5 ],[0.52 0.25 0.57]  [0.03 0.34 0.35],[0.13 0.45 0.47]  [0.03 0.45 0.45],[0.81 0.1 0.81]  [0.02 0.39 0.39]
[0.5 0.5],[0.06 1.17 1.17]  [0.03 0.47 0.47],[-0.01 2.1 2.1 ]  [ 0.02 0.5 0.5 ],[0.02 0.7 0.71]  [0.03 0.34 0.35],[-0. 1.8 1.8 ]  [ 0.02 0.46 0.46],[0.04 0.67 0.67]  [0.03 0.39 0.39]
[0.1 0.9],[-0.04 3.37 3.37]  [ 0.01 0.47 0.47],[0.05 0.86 0.86]  [0.04 0.5 0.5 ],[0. 1.41 1.41]  [0.03 0.34 0.35],[0.04 0.58 0.58]  [0.03 0.45 0.45],[-0.02 2.57 2.57]  [ 0.02 0.39 0.39]
[0.8 0.2],[0.02 0.74 0.74]  [0.04 0.48 0.48],[0.09 4.02 4.02]  [0.01 0.49 0.49],[0.06 1.8 1.8 ]  [0.03 0.34 0.35],[0.08 3.57 3.57]  [0.02 0.44 0.44],[0.04 0.63 0.63]  [0.03 0.4 0.4 ]


In [22]:
for (i, π_b) in enumerate(πs):
    print("""$[\mask{{0.0}}{{{}}},\mask{{0.0}}{{{}}}]$ """.format(π_b[0,0], π_b[0,1]))
    for (j, π_e) in enumerate(πs):
        ix = i*len(πs)+j
        values = df_out_all_2[ix].iloc[[2,4], [2,3,4]].round(2).values
        print("""& \\scalebox{0.8}{$\\begin{matrix} """
              + """\mask{{0.00}}{{{}}} & \mask{{0.00}}{{{}}} & \mask{{0.00}}{{{}}} \\\\ \mask{{0.00}}{{{}}} & \mask{{0.00}}{{{}}} & \mask{{0.00}}{{{}}}""".format(
                  *[int(x) if x.is_integer() else x for x in list(values.ravel())]).replace('-', '\\shortminus ')
              + """ \end{matrix}$} """)
    print("""\\\\[12pt]""")

$[\mask{0.0}{1.0},\mask{0.0}{0.0}]$ 
& \scalebox{0.8}{$\begin{matrix} \mask{0.00}{0.03} & \mask{0.00}{0.5} & \mask{0.00}{0.5} \\ \mask{0.00}{0.03} & \mask{0.00}{0.5} & \mask{0.00}{0.5} \end{matrix}$} 
& \scalebox{0.8}{$\begin{matrix} \mask{0.00}{2} & \mask{0.00}{0} & \mask{0.00}{2} \\ \mask{0.00}{0.02} & \mask{0.00}{0.47} & \mask{0.00}{0.47} \end{matrix}$} 
& \scalebox{0.8}{$\begin{matrix} \mask{0.00}{1.02} & \mask{0.00}{0.25} & \mask{0.00}{1.05} \\ \mask{0.00}{0.03} & \mask{0.00}{0.34} & \mask{0.00}{0.35} \end{matrix}$} 
& \scalebox{0.8}{$\begin{matrix} \mask{0.00}{1.8} & \mask{0.00}{0.05} & \mask{0.00}{1.8} \\ \mask{0.00}{0.02} & \mask{0.00}{0.43} & \mask{0.00}{0.43} \end{matrix}$} 
& \scalebox{0.8}{$\begin{matrix} \mask{0.00}{0.43} & \mask{0.00}{0.4} & \mask{0.00}{0.58} \\ \mask{0.00}{0.03} & \mask{0.00}{0.41} & \mask{0.00}{0.41} \end{matrix}$} 
\\[12pt]
$[\mask{0.0}{0.0},\mask{0.0}{1.0}]$ 
& \scalebox{0.8}{$\begin{matrix} \mask{0.00}{1} & \mask{0.00}{0} & \mask{0.00}{1} \\ \mask{0.