Make sure impact2_engine is on the search path in one way or the other.

In [1]:
import sys
sys.path.append('../../')
import yaml
from impact2_engine.PlasmaCollection import PlasmaCollection

The config specification involves specification for data processing.

In [2]:
with open('../../impact2_engine/config/plasma_config.yml', 'r', encoding = 'utf-8') as stream:
    config = yaml.safe_load(stream)

yaml.dump(config, sys.stdout)

contents:
  CAT:
  - name: SITE_ID
    var: site
  - name: DONOR_SITE_STATUS
    var: status
  - name: GROUP
    var: group
  - name: GENDER
    var: gender
  - bin:
    - 18
    - 25
    - 40
    - 65
    - 1000
    lvl:
    - 18-24
    - 25-39
    - 40-64
    - 65+
    name: AGE
    var: age
  - bin:
    - 0
    - 18.5
    - 25
    - 30
    - 1000
    lvl:
    - underweight
    - normal
    - overweight
    - obese
    name: BMI_CALC
    var: bmi
  - bin: 4
    lvl:
    - q1
    - q2
    - q3
    - q4
    name: WEIGHT
    var: weight_cat
  DTS:
  - format: '%Y-%m-%d'
    name: DONATION_DATE
    var: col_date
  - format: '%Y-%m-%d %H:%M:%S'
    name: PROCEDURE_START
    var: proc_start
  - format: '%Y-%m-%d %H:%M:%S'
    name: PROCEDURE_END
    var: proc_end
  - format: '%Y-%m-%d'
    var: week
  IDS:
  - name: DONOR_NUMBER
    var: don_id
  - name: COLLECTION_NUMBER
    plan: 60000
    var: col_id
  - name: DEVICE_ID
    var: dev_id
  POP:
  - name: ITT
    var: itt
  - name: MITT
  

Instantiate the PlasmaCollection module, initialized with correct config. It contains both .data and .contents. There are also .missing data in this example.

In [3]:
config['data_path'] = '../../impact2_engine/data/' + config['data_path']
col = PlasmaCollection(**config)
col.missing

Filter data entry by collection date and any categorical types, aka 'strata'. The same syntax is used for filtering flags, based on 'population' or 'severity' group of adverse events (AE).

In [4]:
col.comb_lvls

{'site': ['448', '501', '516'],
 'status': ['donated', 'naive'],
 'group': ['A', 'B'],
 'gender': ['female', 'male'],
 'age': ['18-24', '25-39', '40-64', '65+'],
 'bmi': ['normal', 'obese', 'overweight', 'underweight'],
 'weight_cat': ['q1', 'q2', 'q3', 'q4'],
 'POP': ['itt', 'mitt', 'pp'],
 'SEV': ['all_ae', 'non_hyp', 'hyp', 'sig_hyp', 'only_1.1']}

In [5]:
col.filter(
    start = '2020-02-01', end = '2020-03-01',     # None by default
    query = {'gender': 'male', 'SEV': 'non_hyp'}  # None by default
)

Unnamed: 0_level_0,site,dev_id,don_id,status,gender,age,weight_cat,col_id,proc_start,proc_end,...,hyp,only_1.1,itt,mitt,pp,duration_minutes,speed,yield,yield_resid,week
col_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-02-10,516,PCS300-18G150SPG,433440,naive,male,40-64,q3,5161035156,2020-02-10 17:32:00,2020-02-10 18:22:00,...,False,False,True,True,True,50.0,16.0,1.0,0.0,2020-02-10
2020-02-12,516,PCS300-18H362SPG,427284,donated,male,25-39,q2,5161035541,2020-02-12 20:02:00,2020-02-12 20:54:00,...,False,False,True,True,True,52.0,17.326923,1.0,0.0,2020-02-10
2020-02-19,516,PCS300-18G150SPG,105653,donated,male,40-64,q4,5161036924,2020-02-19 15:32:00,2020-02-19 17:01:00,...,False,False,True,True,True,89.0,8.977528,0.99875,0.00125,2020-02-17
2020-02-24,516,PCS300-18G132SPG,101840,donated,male,18-24,q1,5161038081,2020-02-24 19:07:00,2020-02-24 19:57:00,...,False,False,True,True,True,50.0,15.0,1.0,0.0,2020-02-24
2020-02-25,501,PCS300-18D977SPG,178398,donated,male,40-64,q3,5011160207,2020-02-25 17:35:00,2020-02-25 18:39:00,...,False,False,True,True,True,64.0,13.59375,0.998852,0.001148,2020-02-24
2020-02-27,516,PCS300-18H440SPG,297152,donated,male,18-24,q1,5161038691,2020-02-27 20:26:00,2020-02-27 21:09:00,...,True,True,True,True,True,43.0,14.534884,1.0,0.0,2020-02-24
2020-02-28,448,PCS300-18A083SPG,440279,donated,male,25-39,q1,4480241312,2020-02-28 15:13:00,2020-02-28 16:32:00,...,False,False,True,True,True,79.0,9.810127,0.998711,0.001289,2020-02-24


The summary is divided into 2 functions: for COUNT and PLASMA related metrics, correspondingly. It can be optionally split by ANY stratification variables.

In [6]:
col.count_summary(
    strata = ['group'],
    pop = 'itt',         # default
    raw = False          # default
)

Unnamed: 0,group,variable,metric,value
0,A,col_id,nunique,11681.0
1,A,col_id,pct_total,50.902039
2,A,col_weekly_per_device,mean,10.035223
3,A,col_weekly_per_device,median,2.166667
4,A,col_weekly_per_device,quantile_025,0.0
5,A,col_weekly_per_device,quantile_975,29.216667
6,A,col_weekly_per_device,std,11.208932
7,A,dev_id,nunique,61.0
8,A,dev_id,pct_total,50.0
9,B,col_id,nunique,11267.0


In [7]:
col.plasma_summary(
    strata = ['group'],
    pop = 'itt',         # default
    raw = False          # default
)

Unnamed: 0,group,variable,metric,value
0,A,actual_vol,mean,771.462717
1,A,actual_vol,median,800.0
2,A,actual_vol,quantile_025,624.0
3,A,actual_vol,quantile_975,801.0
4,A,actual_vol,std,75.367446
5,A,duration_minutes,mean,49.921496
6,A,duration_minutes,median,47.0
7,A,duration_minutes,quantile_025,33.0
8,A,duration_minutes,quantile_975,83.0
9,A,duration_minutes,std,12.75481


This is combined with the `'overall'` summary, if no stratification. To get the raw data for distribution plotting, make `'raw' = True`. Specify `'sev'` to also get flags.

In [8]:
col.plasma_summary(
    strata = None,  # default
    pop = 'itt',    # default
    raw = True
)

Unnamed: 0,variable,metric,value
0,target_vol,mean,809.478909
1,target_vol,std,88.789868
2,target_vol,median,800.0
3,actual_vol,mean,800.847089
4,actual_vol,std,109.843918
5,actual_vol,median,800.0
6,yield,mean,0.989454
7,yield,std,0.081216
8,yield,median,1.0
9,yield_resid,mean,0.010546
