# Simulation

## Imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Generate random uniform numbers

In [11]:
n = 1000000

In [3]:
runs = 100000

In [55]:
runs3 = 10000000

In [4]:
from numpy.random import default_rng
rng = default_rng()

## Generate GM indifference numbers

In [5]:
from sympy.solvers import solve
from sympy import Symbol, binomial, summation, N

In [6]:
x = Symbol('x', positive=True)
j = Symbol('j')

In [7]:
def eqin(i):
    return summation((binomial(i, j) * (x ** (i - j)) * ((1 - x) ** j)) / j, (j, 1, i))

In [8]:
def solvi(i):
    sol = solve([x**(i - 1) -  eqin(i - 1), x < 1], x)
    return N(sol.rhs)

In [9]:
%%time

gmd3 = pd.DataFrame([solvi(i) for i in range(2, 3 + 1)], columns=['ki'], index=range(2, 3 + 1))

CPU times: user 108 ms, sys: 6.24 ms, total: 115 ms
Wall time: 121 ms


In [12]:
gmd3.index.name = 'i'
gmd3.loc[1] = 0.
gmd3.sort_index(inplace=True)

In [13]:
gmd3

Unnamed: 0_level_0,ki
i,Unnamed: 1_level_1
1,0.0
2,0.5
3,0.689897948556636


In [14]:
ksgm3 = np.flip(gmd3.values.T[0])

In [15]:
ksgm3

array([0.689897948556636, 0.500000000000000, 0.0], dtype=object)

In [9]:
%%time

gmd100 = pd.DataFrame([solvi(i) for i in range(2, 100 + 1)], columns=['ki'], index=range(2, 100 + 1))

CPU times: user 2min 17s, sys: 167 ms, total: 2min 17s
Wall time: 2min 18s


In [10]:
gmd100.index.name = 'i'
gmd100.loc[1] = 0.
gmd100.sort_index(inplace=True)

In [11]:
gmd100

Unnamed: 0_level_0,ki
i,Unnamed: 1_level_1
1,0.000000
2,0.500000000000000
3,0.689897948556636
4,0.775845067578928
5,0.824589583005756
...,...
96,0.991584247313878
97,0.991671387317630
98,0.991756741258583
99,0.991840363491467


In [12]:
def solv2ndordi(i):
    return 1 / (1 + 0.80435226286 / (i - 1) + 0.183199 / (i - 1) ** 2)

In [13]:
%%time

gmdn = pd.DataFrame([solv2ndordi(i) for i in range(2, n + 1)], columns=['ki'], index=range(2, n + 1))

CPU times: user 535 ms, sys: 22.6 ms, total: 558 ms
Wall time: 557 ms


In [14]:
gmdn.index.name = 'i'
gmdn.loc[1] = 0.
gmdn.sort_index(inplace=True)

In [15]:
gmdn.loc[:100]

Unnamed: 0_level_0,ki
i,Unnamed: 1_level_1
1,0.000000
2,0.503132
3,0.690619
4,0.776113
5,0.824716
...,...
96,0.991584
97,0.991671
98,0.991757
99,0.991840


In [28]:
ksgm = np.flip(gmdn.values.T[0])

## Generate MC cutoff numbers

In [22]:
ksmc = np.array([((1 - 1 / n) + np.log((n - r) / n) / n) for r in range(1, n)] + [0])

In [16]:
ksmc3 = np.array([0.672608, 0.545532, 0])

## Run both choices

In [57]:
def compare(xsr, ks1, ks2, ctr):
    rmax = np.argmax(xsr)
    diff1 = np.sign(xsr - ks1)
    rpick1 = np.argmax(diff1)
    tp1, fp1, fn1 = [0, 0, 0]
    diff2 = np.sign(xsr - ks2)
    rpick2 = np.argmax(diff2)
    tp2, fp2, fn2 = [0, 0, 0]
    if rpick1 == rmax:
        tp1 = 1   
    else:
        if rpick1 < rmax:
            fp1 = 1
        else:
            fn1 = 1
    if rpick2 == rmax:
        tp2 = 1   
    else:
        if rpick2 < rmax:
            fp2 = 1
        else:
            fn2 = 1
    if ctr % 10000 == 0:
        print(ctr)
    return [rmax + 1, rpick1 + 1, rpick2 + 1, tp1, fp1, fn1, tp2, fp2, fn2]

In [83]:
def compare3(xsr, ks1, ks2, ctr):
    rmax = np.argmax(xsr)
    xmax = np.max(xsr)
    diff1 = np.sign(xsr - ks1)
    rpick1 = np.argmax(diff1)
    tp1, fp1, fn1 = [0, 0, 0]
    diff2 = np.sign(xsr - ks2)
    rpick2 = np.argmax(diff2)
    tp2, fp2, fn2 = [0, 0, 0]
    if rpick1 == rmax:
        tp1 = 1   
    else:
        if rpick1 < rmax:
            fp1 = 1
        else:
            fn1 = 1
    if rpick2 == rmax:
        tp2 = 1   
    else:
        if rpick2 < rmax:
            fp2 = 1
        else:
            fn2 = 1
    bucket1 = np.argmax(np.sign(xmax - ks1)) + 1
    bucket2 = np.argmax(np.sign(xmax - ks2)) + 1
    if ctr % 100000 == 0:
        print(ctr)
    return [rmax + 1, rpick1 + 1, rpick2 + 1, tp1, fp1, fn1, tp2, fp2, fn2,
           bucket1, bucket2]

In [84]:
%%time

results3 = pd.DataFrame([compare3(rng.random(size=3), ksgm3, ksmc3, run) for run in range(runs3)],
                      columns=['R(Max)', 'R(Choice)-GM', 'R(Choice)-MC',
                               'TP-GM', 'FP-GM', 'FN-GM', 'TP-MC', 'FP-MC', 'FN-MC',
                               'Bucket- GM', 'Bucket- MC'])

0
100000
200000
300000
400000
500000
600000
700000
800000
900000
1000000
1100000
1200000
1300000
1400000
1500000
1600000
1700000
1800000
1900000
2000000
2100000
2200000
2300000
2400000
2500000
2600000
2700000
2800000
2900000
3000000
3100000
3200000
3300000
3400000
3500000
3600000
3700000
3800000
3900000
4000000
4100000
4200000
4300000
4400000
4500000
4600000
4700000
4800000
4900000
5000000
5100000
5200000
5300000
5400000
5500000
5600000
5700000
5800000
5900000
6000000
6100000
6200000
6300000
6400000
6500000
6600000
6700000
6800000
6900000
7000000
7100000
7200000
7300000
7400000
7500000
7600000
7700000
7800000
7900000
8000000
8100000
8200000
8300000
8400000
8500000
8600000
8700000
8800000
8900000
9000000
9100000
9200000
9300000
9400000
9500000
9600000
9700000
9800000
9900000
CPU times: user 53min 14s, sys: 34.2 s, total: 53min 48s
Wall time: 53min 38s


In [85]:
results3.mean()

R(Max)          1.999875
R(Choice)-GM    2.035082
R(Choice)-MC    2.039666
TP-GM           0.677475
FP-GM           0.171242
FN-GM           0.151283
TP-MC           0.679721
FP-MC           0.164581
FN-MC           0.155698
Bucket- GM      1.453576
Bucket- MC      1.466914
dtype: float64

In [86]:
results3['R(Max)'].value_counts(normalize=True, sort=False)

1    0.333491
2    0.333143
3    0.333366
Name: R(Max), dtype: float64

In [87]:
results3['R(Choice)-GM'].value_counts(normalize=True, sort=False)

1    0.310029
2    0.344860
3    0.345111
Name: R(Choice)-GM, dtype: float64

In [88]:
results3['R(Choice)-MC'].value_counts(normalize=True, sort=False)

1    0.327375
2    0.305584
3    0.367041
Name: R(Choice)-MC, dtype: float64

In [89]:
def pwinsGM(k1, k2):
    p1 = 1 / 3 - (k1 ** 3) / 3
    p2 = k1 / 2 - (k1 ** 3) / 6 - (k2 ** 3) / 3
    p3 = (k1 ** 2) / 2 - (k1 ** 3) / 3 + (k2 ** 2) / 2 - (k2 ** 3) / 3
    return [p1, p2, p3]

In [90]:
def pwinsMC(k1, k2):
    p1 = 1 / 3 - (k1 ** 3) / 3
    p2 = k1 / 2 - (k1 ** 3) / 6 - (k2 ** 3) / 3
    p3 = k1 * k2 - ((k1 ** 2) * k2) / 2 - (k2 ** 3) / 6
    return [p1, p2, p3]

In [91]:
pwinsGM(ksgm3[0], ksgm3[1])

[0.223878912839870, 0.248555097364920, 0.211858502551198]

In [92]:
pwinsMC(ksgm3[0], ksgm3[1])

[0.223878912839870, 0.248555097364920, 0.205125846089321]

In [93]:
[
(results3['TP-GM'] * results3['R(Max)'] == 1).mean(),
(results3['TP-GM'] * results3['R(Max)'] == 2).mean(),
(results3['TP-GM'] * results3['R(Max)'] == 3).mean()]

[0.2239437, 0.2484077, 0.2051239]

In [94]:
np.sum(pwinsGM(ksgm3[0], ksgm3[1]))

0.684292512755988

In [95]:
np.sum(pwinsMC(ksgm3[0], ksgm3[1]))

0.677559856294111

In [96]:
results3['TP-GM'].mean()

0.6774753

In [104]:
def pwin3bucketsGM(k1, k2):
    b3 = (k2 ** 3) / 3
    b2 = (k1 ** 3) / 6 + (k1 * (k2 ** 2)) / 2 - (k2 ** 3) * 2 / 3
    b1 = (k1 ** 2) / 2 - (k1 ** 3) / 2 + (k2 ** 2) / 2 - (k1 * (k2 ** 2)) / 2
    return [b1, b2, b3]

In [105]:
pwin3bucketsGM(ksgm3[0], ksgm3[1])

[0.112560715401553, 0.0576311204829776, 0.0416666666666667]

In [106]:
np.sum(pwin3bucketsGM(ksgm3[0], ksgm3[1]))

0.211858502551198

In [103]:
[
(results3['TP-GM'] * (results3['R(Max)'] == 3) * (results3['Bucket- GM'] == 1)).mean(),
(results3['TP-GM'] * (results3['R(Max)'] == 3) * (results3['Bucket- GM'] == 2)).mean(),
(results3['TP-GM'] * (results3['R(Max)'] == 3) * (results3['Bucket- GM'] == 3)).mean()]

[0.1068559, 0.0566401, 0.0416279]

In [113]:
def round1MC(k1, k2):
    tp = 1 / 3 - (k1 ** 3) / 3
    fp = 2 / 3 - k1 + (k1 ** 3) / 3
    fn = (k1 ** 3) / 3
    return [tp, fp, fn]

In [122]:
def round2MC(k1, k2):
    tp = k1 / 2 - (k1 ** 3) / 6 - (k2 ** 3) / 3
    fp = k1 / 2 - (k1 ** 3) / 6 - (k1 * k2) + ((k1 ** 2) * k2) / 2 + (k2 ** 3) / 6
    fn = (k2 ** 3) / 3
    return [tp, fp, fn]

In [114]:
round1MC(ksgm3[0], ksgm3[1])

[0.223878912839870, 0.0862231386034939, 0.109454420493463]

In [120]:
[
(results3['TP-GM'] * results3['R(Choice)-GM'] == 1).mean(),
(results3['FP-GM'] * results3['R(Choice)-GM'] == 1).mean(),
(results3['FN-GM'] * results3['R(Max)'] == 1).mean()]

[0.2239437, 0.0860853, 0.1095473]

In [123]:
round2MC(ksgm3[0], ksgm3[1])

[0.248555097364920, 0.0850959179422654, 0.0416666666666667]

In [117]:
[
(results3['TP-GM'] * results3['R(Choice)-GM'] == 2).mean(),
(results3['FP-GM'] * results3['R(Choice)-GM'] == 2).mean(),
(results3['FN-GM'] * results3['R(Max)'] == 2).mean()]

[0.2484077, 0.0851567, 0.0417354]

In [121]:
[
(results3['TP-GM'] * results3['R(Choice)-GM'] == 3).mean(),
(results3['FP-GM'] * results3['R(Choice)-GM'] == 3).mean(),
(results3['FN-GM'] * results3['R(Max)'] == 3).mean()]

[0.2051239, 0.0, 0.0]

In [None]:
%%time

results = pd.DataFrame([compare(rng.random(size=n), ksgm, ksmc, run) for run in range(runs)],
                      columns=['R(Max)', 'R(Choice)-GM', 'R(Choice)-MC',
                               'TP-GM', 'FP-GM', 'FN-GM', 'TP-MC', 'FP-MC', 'FN-MC'])

In [39]:
results.mean()

R(Max)          498436.28951
R(Choice)-GM    553636.06085
R(Choice)-MC    537557.75820
TP-GM                0.53869
FP-GM                0.26174
FN-GM                0.19957
TP-MC                0.56211
FP-MC                0.25376
FN-MC                0.18413
dtype: float64