In [29]:
import pandas as pd
import numpy as np
from scipy.stats import norm

Approach taken from NCSS' guide [Two Proportions – Non-Inferiority, Superiority, Equivalence, and Two-Sided Tests vs. a Margin](https://ncss-wpengine.netdna-ssl.com/wp-content/themes/ncss/pdf/Procedures/NCSS/Two_Proportions-Non-Inferiority,_Superiority,_Equivalence,_and_Two-Sided_Tests_vs_a_Margin.pdf), as with previous statistical approaches (i.e. as their guides include detailed formulae).

In [45]:
data = {'test': [60, 1000-60], 'ctrl': [70, 1000-70]}
# N.B. convention = test is n1 and p1, ctrl is n2 and p2, hence the order

df = pd.DataFrame.from_dict(data, orient='index',
                            columns=['don', 'non'])
print(df)

      don  non
test   60  940
ctrl   70  930


In [46]:
delta = -0.01  # The non-inferiority margin

Take the test statistic from p19-20.

In [47]:
df['total'] = df.sum(axis=1)
[n1, n2] = df['total']
N = n1 + n2
[p1, p2] = df['don'] / df['total']
print('Test: {}, {}\nCtrl: {}, {}'.format(n1, p1, n2, p2))

Test: 1000, 0.06
Ctrl: 1000, 0.07


In [48]:
l0 = df.loc['ctrl', 'don'] * delta * (1 - delta)
l1 = (n2*delta - N - 2*df.loc['ctrl', 'don']) * delta + df['don'].sum()
l2 = (N + n2)*delta - N - df['don'].sum()
l3 = N
for i in [l0, l1, l2, l3]:
    print(i)

-0.7070000000000001
151.5
-2160.0
2000


In [49]:
C = (l2**3 / (27 * (l3**3))) - (l1*l2)/(6 * (l3**2)) + l0/(2*l3)
B = np.sign(C) * np.sqrt((l2**2)/(9 * (l3**2)) - l1/(3*l3))
A = (1/3) * (np.pi + np.arccos(C/(B**3)))
print(C, B, A)

-0.033197750000000005 -0.3230325061042619 1.10529455981647


In [50]:
p2_cd = 2*B*np.cos(A) - l2/(3*l3)  # constrained p
p1_cd = p2_cd + delta
print(p1_cd, p2_cd, p1_cd - p2_cd)

0.059999999999999505 0.06999999999999951 -0.010000000000000002


In [51]:
z_MND_num = p1 - p2 - delta
z_MND_den = (p1_cd*(1-p1_cd)/n1 + p2_cd*(1-p2_cd)/n2) * N / (N-1)
z_MND_den = np.sqrt(z_MND_den)
z_MND = z_MND_num / z_MND_den
print(z_MND)

-7.86689802094413e-16


p38 => this is treated as a standard z-score, and therefore is turned into a p-value by:

In [52]:
norm.cdf(z_MND)

0.49999999999999967

It being p=0.5 for a situ where the observed difference seen is the actual NIM makes sense! Looks good => to functionify it:

In [53]:
def mndifference(datafr, nim):
    """Perform the Miettinen-Nurminen Large-Sample Score Test of the Difference.
    
    Parameters
    ----------
    datafr : pd.DataFrame
        The data to run the test on.
        Columns: don, non, total
        Index: test, ctrl
        N.B. the convention here is that the experiment is indexed as 1, and the control as 2, in reverse of normal.
    nim : float, <0 when lower is worse.
        The Non-Inferiority Margin: the amount that P1 can be less than P2 and you still conclude that
        group 1 (treatment) is not inferior to group 2 (control).

    Returns
    -------
    ? : ?
        Blah
    """
    [n1, n2] = datafr['total']
    N = n1 + n2
    [p1, p2] = datafr['don'] / datafr['total']
    
    l0 = datafr.loc['ctrl', 'don'] * nim * (1 - nim)
    l1 = (n2*nim - N - 2*datafr.loc['ctrl', 'don']) * nim + datafr['don'].sum()
    l2 = (N + n2)*nim - N - datafr['don'].sum()
    l3 = N
    
    C = (l2**3 / (27 * (l3**3))) - (l1*l2)/(6 * (l3**2)) + l0/(2*l3)
    B = np.sign(C) * np.sqrt((l2**2)/(9 * (l3**2)) - l1/(3*l3))
    A = (1/3) * (np.pi + np.arccos(C/(B**3)))
    
    p2_cd = 2*B*np.cos(A) - l2/(3*l3)  # constrained p
    p1_cd = p2_cd + nim
    
    z_MND_num = p1 - p2 - nim
    z_MND_den = (p1_cd*(1-p1_cd)/n1 + p2_cd*(1-p2_cd)/n2) * N / (N-1)
    z_MND_den = np.sqrt(z_MND_den)
    z_MND = z_MND_num / z_MND_den

    return z_MND, norm.cdf(z_MND)


print(mndifference(df, delta))

(-7.86689802094413e-16, 0.49999999999999967)


In [63]:
diff_range = 50
differences = range(-diff_range, diff_range+1, 10)

for diff in differences:
    test_convs = 70+diff
    step_data = {'test': [test_convs, 1000-test_convs, 1000], 'ctrl': [70, 1000-70, 1000]}
    step_df = pd.DataFrame.from_dict(step_data, orient='index', columns=['don', 'non', 'total'])
    print(step_df, '\n', mndifference(step_df, delta))

      don  non  total
test   20  980   1000
ctrl   70  930   1000 
 (-4.420962695675758, 4.913106912466956e-06)
      don  non  total
test   30  970   1000
ctrl   70  930   1000 
 (-3.1217719308486593, 0.0008988307140223216)
      don  non  total
test   40  960   1000
ctrl   70  930   1000 
 (-1.976435234975862, 0.024052752090734805)
      don  non  total
test   50  950   1000
ctrl   70  930   1000 
 (-0.9444304035075204, 0.1724748754867339)
      don  non  total
test   60  940   1000
ctrl   70  930   1000 
 (-7.86689802094413e-16, 0.49999999999999967)
      don  non  total
test   70  930   1000
ctrl   70  930   1000 
 (0.8744367809944982, 0.8090597828474668)
      don  non  total
test   80  920   1000
ctrl   70  930   1000 
 (1.6915062296095362, 0.9546299204970722)
      don  non  total
test   90  910   1000
ctrl   70  930   1000 
 (2.4606304765911533, 0.9930653434310451)
      don  non  total
test  100  900   1000
ctrl   70  930   1000 
 (3.189054413504759, 0.9992863049662908)
      

Hm, that's wrong. p38 => p<0.05 should show that the experimental treatment is non-inferior to the standard treatment.

But this is doing the reverse: p is getting larger as the test becomes more non-inferior! Reversed the p-val somewhere?