In [1]:
import numpy as np

In [2]:
M = 1000

In [3]:
thetas = np.random.rand(M, 4)

In [4]:
thetas[:, 1] = 0.5
thetas[:, 2] = 0.5
thetas[:, 3] = 0.5

In [5]:
thetas[:5, :]

array([[0.67379225, 0.5       , 0.5       , 0.5       ],
       [0.51620686, 0.5       , 0.5       , 0.5       ],
       [0.15890129, 0.5       , 0.5       , 0.5       ],
       [0.16865146, 0.5       , 0.5       , 0.5       ],
       [0.11213823, 0.5       , 0.5       , 0.5       ]])

In [6]:
pos_thetas = thetas[(thetas[:, 0] > 0.5), :]
n1 = pos_thetas.shape[0]

In [7]:
neg_thetas = thetas[(thetas[:, 0] <= 0.5), :]
n2 = neg_thetas.shape[0]

In [8]:
n1, n2

(504, 496)

In [9]:
def expected_reward(theta, action):
    return (theta[:, action].sum(1) / (1 + theta[:, action].sum(1))).mean()

In [10]:
def optimal_reward():
    return (n1 / M) * expected_reward(pos_thetas, [0, 1, 2]) + (n2 / M) * expected_reward(neg_thetas, [1, 2, 3])

In [11]:
def information_ratio(action):
    var = (n1 / M) * (expected_reward(pos_thetas, action) - expected_reward(thetas, action)) ** 2 + (n2/M) * (expected_reward(neg_thetas, action) - expected_reward(thetas, action)) **2
    regret2 = (optimal_reward() - expected_reward(thetas, action)) ** 2
    print(regret2)
    print('/')
    print(var)
    return regret2 / var

In [12]:
optimal_reward()

0.617304672226096

Action A1:

In [13]:
a1 = [0]

In [15]:
expected_reward(thetas, a1)

0.30815957022249885

In [16]:
information_ratio(a1)

0.09557069409281448
/
0.0131808222940574


7.250738380404591

Action A2

In [17]:
a2 = [0, 1]

In [18]:
expected_reward(thetas, a2)

0.48971767181422954

In [19]:
information_ratio(a2)

0.016278442674097605
/
0.003963564106111235


4.1070214176676565

Action A3

In [20]:
a3 = [0, 2]

In [21]:
information_ratio(a3)

0.016278442674097605
/
0.003963564106111235


4.1070214176676565

Action A4

In [22]:
a4 = [0, 1, 2]

In [23]:
information_ratio(a4)

0.0005056736739636081
/
0.0015867950541208317


0.3186761092117079

- a1 \subset a2
- a3 = a1 + [2]
- a4 = a2 + [2]

Therefore we expect:
- ir(a1) - ir(a3) < ir(a2) - ir(a4)

It did not happen...

In [24]:
information_ratio(a1) - information_ratio(a3)

0.09557069409281448
/
0.0131808222940574
0.016278442674097605
/
0.003963564106111235


3.143716962736934

In [25]:
information_ratio(a2) - information_ratio(a4)

0.016278442674097605
/
0.003963564106111235
0.0005056736739636081
/
0.0015867950541208317


3.7883453084559484