Make sure impact2_engine is on the search path in one way or the other.

In [1]:
import sys
sys.path.append('../../')
import yaml
from impact2_engine.Safety import Safety

The config specification is a bit excessive. Assume the data has been processed, with renamed columns.

In [2]:
with open('../../impact2_engine/config/safety_config.yml', 'r', encoding = 'utf-8') as stream:
    config = yaml.safe_load(stream)

yaml.dump(config, sys.stdout)

contents:
  CAT:
  - name: SITE_ID
    var: site
  - name: DONOR_SITE_STATUS
    var: status
  - name: GROUP
    var: group
  - name: GENDER
    var: gender
  - bin:
    - 18
    - 25
    - 40
    - 65
    - 1000
    lvl:
    - 18-24
    - 25-39
    - 40-64
    - 65+
    name: AGE
    var: age
  - bin:
    - 0
    - 18.5
    - 25
    - 30
    - 1000
    lvl:
    - underweight
    - normal
    - overweight
    - obese
    name: BMI_CALC
    var: bmi
  - bin: 4
    lvl:
    - q1
    - q2
    - q3
    - q4
    name: WEIGHT
    var: weight
  DAT:
  - name: DONATION_DATE
    var: col_date
  IDS:
  - name: DONOR_NUMBER
    var: don_id
  - name: COLLECTION_NUMBER
    plan: 60000
    var: col_id
  POP:
  - name: ITT
    var: itt
  - name: MITT
    var: mitt
  - name: PP
    var: pp
  SEV:
  - aes:
    - '1.1'
    - '1.2'
    - '1.3'
    - '1.4'
    - '1.5'
    - '1.6'
    - '2.1'
    - '3.1'
    - '3.2'
    - '3.3'
    - '3.4'
    - '3.5'
    - '3.6'
    - '3.7'
    - '4.1'
    - '4.2'
    - '

Instantiate the Safety module, initialized with correct config. It contains both .data and .contents. There are no .missing data in this example.

In [3]:
config['data_path'] = '../../impact2_engine/data/' + config['data_path']
saf = Safety(**config)
saf.missing is None

True

Filter data entry by collection date and any categorical types, aka 'strata'. The same syntax is used for filtering flags, based on 'population' or 'severity' group of adverse events (AE).

In [4]:
saf.comb_lvls

{'site': ['448', '501', '516'],
 'status': ['donated', 'naive'],
 'group': ['A', 'B'],
 'gender': ['female', 'male'],
 'age': ['18-24', '25-39', '40-64', '65+'],
 'bmi': ['normal', 'obese', 'overweight', 'underweight'],
 'weight': ['q1', 'q2', 'q3', 'q4'],
 'POP': ['itt', 'mitt', 'pp'],
 'SEV': ['all_ae', 'non_hyp', 'hyp', 'sig_hyp', 'only_1.1']}

In [5]:
saf.filter(
    start = '2020-02-01', end = '2020-03-01',     # None by default
    query = {'gender': 'male', 'SEV': 'non_hyp'}  # None by default
)

Unnamed: 0_level_0,site,don_id,status,gender,age,weight,col_id,group,bmi,sig_hyp,...,1.4,1.5,3.1,3.2,3.3,4.1,4.2,7.1,8.1,9.1
col_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-02-10,516,433440,naive,male,40-64,q3,5161035156,A,obese,False,...,False,False,False,False,True,False,False,False,False,False
2020-02-12,516,427284,donated,male,25-39,q2,5161035541,B,overweight,False,...,False,False,False,False,True,False,False,False,False,False
2020-02-13,516,177735,donated,male,40-64,q2,5161035823,A,overweight,False,...,False,False,False,False,False,False,False,True,False,False
2020-02-18,516,274128,donated,male,25-39,q1,5161036743,B,normal,False,...,False,False,False,False,True,False,False,False,False,False
2020-02-19,516,105653,donated,male,40-64,q4,5161036924,A,obese,False,...,False,False,False,True,False,False,False,False,False,False
2020-02-24,516,101840,donated,male,18-24,q1,5161038081,A,overweight,False,...,False,False,False,False,True,False,False,False,False,False
2020-02-25,501,178398,donated,male,40-64,q3,5011160207,B,obese,False,...,False,False,False,False,False,True,False,False,False,False
2020-02-27,516,297152,donated,male,18-24,q1,5161038691,A,normal,False,...,False,False,False,False,True,False,False,False,False,False
2020-02-28,448,440279,donated,male,25-39,q1,4480241312,B,normal,False,...,False,False,False,False,True,False,False,False,False,False


General info summary consists of two tables for collection/donation and AE rates, correspondingly, split by group.

In [6]:
saf.summary_donat()

variable,col_id,don_id,col_id,col_per_don
metric,nunique,nunique,pct,mean
group,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
A,11775,1726,50.89251,3.419983
B,11362,1717,49.10749,3.300029
grand_total,23137,3443,38.561667,3.361076


In [7]:
saf.summary_risk()

variable,all_ae,hyp,non_hyp,only_1.1,sig_hyp,all_ae,non_hyp,hyp,sig_hyp,only_1.1
metric,sum,sum,sum,sum,sum,pct,pct,pct,pct,pct
group,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
A,73,48,28,42,6,0.619958,0.237792,0.407643,0.050955,0.356688
B,84,67,20,63,4,0.739306,0.176025,0.589685,0.035205,0.55448
grand_total,157,115,48,105,10,0.678567,0.20746,0.497039,0.043221,0.453819


AE summary (absolute 'sum' and relative % 'pct') within the severity group -> both for graphical (pie chart) and tabular representation.

In [8]:
saf.summary_aes(
    sev = 'non_hyp', 
    pop = 'itt'  # default
)

variable,3.1,3.2,3.3,4.1,4.2,7.1,8.1,9.1,non_hyp,3.1,3.2,3.3,4.1,4.2,7.1,8.1,9.1
metric,sum,sum,sum,sum,sum,sum,sum,sum,sum,pct,pct,pct,pct,pct,pct,pct,pct
group,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2
A,3,1,16,3,0,5,0,0,28,10.714286,3.571429,57.142857,10.714286,0.0,17.857143,0.0,0.0
B,1,0,12,2,1,3,1,1,20,5.0,0.0,60.0,10.0,5.0,15.0,5.0,5.0
grand_total,4,1,28,5,1,8,1,1,48,8.333333,2.083333,58.333333,10.416667,2.083333,16.666667,2.083333,2.083333


AE summary of certain severity, split by any number of 'strata' (% w.r.t. last variable in a list) -> both for pie chart and bar graph.

In [9]:
saf.summary_sev(
    strata = ['site', 'group'],  # must be list[str]
    sev = 'non_hyp',
    pop = 'itt'                  # default
)

Unnamed: 0_level_0,variable,non_hyp,non_hyp
Unnamed: 0_level_1,metric,sum,pct
site,group,Unnamed: 2_level_2,Unnamed: 3_level_2
448,A,2.0,40.0
448,B,3.0,60.0
448,448 - subtotal,5.0,100.0
501,A,8.0,66.666667
501,B,4.0,33.333333
501,501 - subtotal,12.0,100.0
516,A,18.0,58.064516
516,B,13.0,41.935484
516,516 - subtotal,31.0,100.0
grand_total,,48.0,300.0


Provide confidence interval (CI) for risk difference ($p_B - p_A$) of AE, calculated by various methods. Point estimates are maximum likelihood. Margin for comparison is included from config.

In [10]:
saf.calc_ci(
    sev = 'non_hyp',
    pop = 'itt',      # default
    limits = 'both',  # default from {'lower', 'upper', 'both'}
    conf = .89        # default confidence level
) 

Unnamed: 0,LL,UL,risk_diff,margin
agresti_caffo,-0.158779,0.035882,-0.061767,0
anderson_hauck,-0.161545,0.038011,-0.061767,0
brown_li,-0.157131,0.033612,-0.061767,0
jeffreys,-0.15796,0.034745,-0.061767,0
jeffreys_hybrid,-0.157969,0.034754,-0.061767,0
jeffreys_perks,-0.157919,0.034792,-0.061767,0
jeffreys_perks_cc,-0.160076,0.036958,-0.061767,0
haldane,-0.156932,0.033805,-0.061767,0
haldane_cc,-0.159088,0.03597,-0.061767,0
newcombe,-0.160325,0.035402,-0.061767,0


Compute AE risk, split by group and pulled, using data aggregated by day ('d'), week ('w'), month ('m') within the specified range.

In [11]:
saf.summary_longitudinal(
    method = 'wilson',
    sev = 'non_hyp',
    pop = 'itt',                # default
    aggregate = 'w',            # default
    conf = .89,                 # default
    start = None,               # default
    end = None                  # default
)

sample,A,A,A,A,A,B,B,B,B,B,pulled,pulled,pulled,pulled,pulled
metric,LL,UL,n,rate,x,LL,UL,n,rate,x,LL,UL,n,rate,x
period,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
2020-01-06,0.0,0.164214,13,0.0,0,0.0,0.221064,9,0.0,0,6.217084e-18,0.104024,22,0.0,0
2020-01-13,0.0,0.019564,128,0.0,0,0.0,0.020025,125,0.0,0,0.0,0.009995,253,0.0,0
2020-01-20,0.0,0.009879,256,0.0,0,8.591122999999999e-19,0.009511,266,0.0,0,0.0,0.004869,522,0.0,0
2020-01-27,0.001721911,0.010216,714,0.004202,3,0.001753867,0.010405,701,0.00428,3,0.002234272,0.008033,1415,0.00424,6
2020-02-03,2.1616789999999997e-19,0.003101,821,0.0,0,0.0002939885,0.005474,787,0.001271,1,0.0001438727,0.002684,1608,0.000622,1
2020-02-10,0.002771256,0.009082,1394,0.005022,7,0.0005156984,0.004436,1321,0.001514,2,0.001958762,0.005605,2715,0.003315,9
2020-02-17,0.0001464225,0.002731,1580,0.000633,1,0.001204629,0.005709,1524,0.002625,4,0.0007998604,0.003241,3104,0.001611,5
2020-02-24,0.0005034985,0.004332,1353,0.001478,2,0.0005137534,0.00442,1326,0.001508,2,0.0006851437,0.003251,2679,0.001493,4
2020-03-02,0.001220655,0.005785,1504,0.00266,4,0.0,0.001821,1400,0.0,0,0.0006320467,0.002999,2904,0.001377,4
2020-03-09,0.000423636,0.003646,1608,0.001244,2,0.001176063,0.005574,1561,0.002562,4,0.0009972485,0.003592,3169,0.001893,6


Use one of the following methods.

In [12]:
from impact2_engine.utils.CalcCI import Rate
Rate.METHODS

['agresti_coull',
 'clopper_pearson',
 'clopper_pearson_cc',
 'jeffreys',
 'uniform',
 'wald',
 'wald_cc',
 'wilson',
 'wilson_cc']