In [1]:
from core.helpers.session_helper import SessionHelper
session = SessionHelper().session

2019-07-31 16:55:29,379 - core.helpers.session_helper.SessionHelper - INFO - Creating session for dev environment...
2019-07-31 16:55:29,410 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Generating administrator mocks.
2019-07-31 16:55:29,447 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Done generating administrator mocks.
2019-07-31 16:55:29,448 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Generating pharmaceutical company mocks.
2019-07-31 16:55:29,453 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Done generating pharmaceutical company mocks.
2019-07-31 16:55:29,454 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Generating brand mocks.
2019-07-31 16:55:29,458 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Done generating brand mocks.
2019-07-31 16:55:29,460 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Generating segment mocks.
2019-07-31 16:55:2

In [2]:
"""
************ CONFIGURATION - PLEASE TOUCH **************
Pipeline Builder configuration: creates configurations from variables specified here!!
This cell will be off in production as configurations will come from the configuration postgres DB.
"""
# config vars: this dataset
config_pharma = "sun" # the pharmaceutical company which owns {brand}
config_brand = "ilumya" # the brand this pipeline operates on
config_state = "enrich" # the state this transform runs in
config_name = "pending_enrichment" # the name of this transform, which is the name of this notebook without .ipynb

# input vars: dataset to fetch. Recall that a contract published to S3 has a key format branch/pharma/brand/state/name
input_pharma = "sun"
input_brand = "ilumya"
input_state = "ingest"
input_name = "symphony_health_association_ingest_column_mapping"
input_branch = "sun-extract-validation" # if None, input_branch is automagically set to your working branch

In [3]:
"""
************ SETUP - DON'T TOUCH **************
Populating config mocker based on config parameters...
"""
import core.helpers.pipeline_builder as builder

ids = builder.build(config_pharma, config_brand, config_state, config_name, session)
transform_id = ids[0]
run_id = ids[1]

2019-07-31 16:55:29,637 - core.logging - DEBUG - Adding/getting mocks for specified configurations...
2019-07-31 16:55:29,669 - core.logging - DEBUG - Done. Creating mock run event and committing results to configuration mocker.


In [4]:
"""
************ SETUP - DON'T TOUCH **************
This section imports data from the configuration database
and should not need to be altered or otherwise messed with. 
~~These are not the droids you are looking for~~
"""
from core.constants import BRANCH_NAME, ENV_BUCKET
from core.helpers.session_helper import SessionHelper
from core.models.configuration import Transformation
from dataclasses import dataclass
from core.dataset_contract import DatasetContract

db_transform = session.query(Transformation).filter(Transformation.id == transform_id).one()

@dataclass
class DbTransform:
    id: int = db_transform.id ## the instance id of the transform in the config app
    name: str = db_transform.transformation_template.name ## the transform name in the config app
    state: str = db_transform.pipeline_state.pipeline_state_type.name ## the pipeline state, one of raw, ingest, master, enhance, enrich, metrics, dimensional
    branch:str = BRANCH_NAME ## the git branch for this execution 
    brand: str = db_transform.pipeline_state.pipeline.brand.name ## the pharma brand name
    pharmaceutical_company: str = db_transform.pipeline_state.pipeline.brand.pharmaceutical_company.name # the pharma company name
    publish_contract: DatasetContract = DatasetContract(branch=BRANCH_NAME,
                            state=db_transform.pipeline_state.pipeline_state_type.name,
                            parent=db_transform.pipeline_state.pipeline.brand.pharmaceutical_company.name,
                            child=db_transform.pipeline_state.pipeline.brand.name,
                            dataset=db_transform.transformation_template.name)


# CORE Cartridge Notebook::[transform name here]
![CORE Logo](assets/coreLogo.png) 

---
## Keep in Mind
Good Transforms Are...
- **singular in purpose:** good transforms do one and only one thing, and handle all known cases for that thing. 
- **repeatable:** transforms should be written in a way that they can be run against the same dataset an infinate number of times and get the same result every time. 
- **easy to read:** 99 times out of 100, readable, clear code that runs a little slower is more valuable than a mess that runs quickly. 
- **No 'magic numbers':** if a variable or function is not instantly obvious as to what it is or does, without context, maybe consider renaming it.

## Workflow - how to use this notebook to make science
#### Data Science
1. **Document your transform.** Fill out the _description_ cell below describing what it is this transform does; this will appear in the configuration application where Ops will create, configure and update pipelines. 
1. **Define your config object.** Fill out the _configuration_ cell below the commented-out guide to define the variables you want ops to set in the configuration application (these will populate here for every pipeline). 
2. **Build your transformation logic.** Use the transformation cell to do that magic that you do. 
![caution](assets/cautionTape.png)

### Configuration

import pandas as pd
pd.options.display.max_rows=999
pd.options.display.max_columns=999

In [97]:
""" 
********* VARIABLES - PLEASE TOUCH ********* 
This section defines what you expect to get from the configuration application 
in a single "transform" object. Define the vars you need here, and comment inline to the right of them 
for all-in-one documentation. 
Engineering will build a production "transform" object for every pipeline that matches what you define here.

@@@ FORMAT OF THE DATA CLASS IS: @@@ 

<variable_name>: <data_type> #<comment explaining what the value is to future us>

e.g.

class Transform(DbTransform):
    some_ratio: float
    site_name: str

~~These ARE the droids you are looking for~~
"""

class Transform(DbTransform):
    '''
    YOUR properties go here!!
    Variable properties should be assigned to the exact name of
    the transformation as it appears in the Jupyter notebook filename.
    '''

    sort_columns: list # List of Long-ID, Pharmacy Code, Brand/Medication and Status Date used to sort the dataframe. Order of variables matters
    substatus_list: list # List of integrichain substatuses for current customer. Order of variables does not matter.
    pjh: str # Patient Journey Hiearchy column, should have been added in an early transform
    ic_status: str # Integrichain Status column, should have been added in an early transform
    ic_substatus: str # Integrichain Sub Status column, should have been added in an early transform
    bvpa: str # BV/PA string in Patient Journey Hierarchy column. String should be something like 'BV/PA'
    intake: str # Intake string in Patient Journey Hierarchy column. String should be something like 'INTAKE'
    fulfillment: str # Fulfillment string in Patient Journey Hierarchy column. String should be something like 'FULFILLMENT'
    pending: str # Pending string in Integrichain Status column. String should be something like 'PENDING'
    
    
    def pending_enrichment(self,df):

        df = df.sort_values(self.sort_columns,ascending=[True, True, True, True])

        groupby_cols = self.sort_columns[:-1]
        status_date = self.sort_columns[-1]
        
        df, min_bvpa, max_bvpa, max_bvpa_intake, min_bvpa_fulfillment = self._create_bvpa(df=df, groupby_cols=groupby_cols, status_date=status_date)
        
        df = self._merge_bvpa(df=df, bvpa_df=min_bvpa, min_max='min', groupby_cols=groupby_cols ,status_date=status_date)
        df = self._merge_bvpa(df=df, bvpa_df=max_bvpa_intake, min_max='max', groupby_cols=groupby_cols ,status_date=status_date)
        df = self._merge_bvpa(df=df, bvpa_df=max_bvpa, min_max='max', groupby_cols=groupby_cols ,status_date=status_date)
        df = self._merge_bvpa(df=df, bvpa_df=min_bvpa_fulfillment, min_max='min', groupby_cols=groupby_cols ,status_date=status_date)

        enrich_df = df[(df[self.ic_status] == self.pending) & (df[self.ic_substatus].isin(self.substatus_list))]
        df = df[~((df[self.ic_status] == self.pending) & (df[self.ic_substatus].isin(self.substatus_list)))]

        enrich_df.loc[:,'Before_min_fulfillment'] = np.where(enrich_df.loc[:,status_date] <= enrich_df.loc[:,'min_bvpa_fulfillment'],1,0)
        enrich_df.loc[:,'After_min_BVPA'] = np.where(enrich_df.loc[:,status_date] > enrich_df.loc[:,'min_bvpa_date'],1,0)
        enrich_df.loc[:,'Before_max_BVPA'] = np.where(enrich_df.loc[:,status_date] < enrich_df.loc[:,'max_bvpa_date'],1,0)
        enrich_df.loc[:,'After_max_intake'] = np.where(enrich_df.loc[:,status_date] >= enrich_df.loc[:,'max_bvpa_intake_date'],1,0)
        
        enrich_df.loc[:,self.pjh] = (
            np.where(enrich_df.loc[:,'Before_min_fulfillment'] == 1, self.intake,
            np.where((enrich_df.loc[:,'After_min_BVPA'] == 1) & (enrich_df.loc[:,'Before_max_BVPA'] == 1),self.bvpa,
            np.where(enrich_df.loc[:,'After_max_intake'] == 1, self.fulfillment,
            enrich_df.loc[:,self.pjh])))
        )
        
        df = pd.concat([df,enrich_df],sort=False)
        
        df = df.drop(labels=['After_max_BVPA','After_max_intake','Before_min_BVPA','Before_min_fulfillment','min_bvpa_date','max_bvpa_intake_date','max_bvpa_date','min_bvpa_fulfillment'],axis=1)
        
        return df
    
        
    def _create_bvpa(self, df, groupby_cols, status_date):
        
        bvpa_df = df[(df[self.pjh] == self.bvpa)]

        intake_df = df[(df.Patient_Journey_Hierarchy.isin([self.bvpa,self.intake]))]

        fulfillment_df = df[(df.Patient_Journey_Hierarchy.isin([self.bvpa,self.fulfillment]))]

        min_bvpa = (
            bvpa_df
            .groupby(groupby_cols)[status_date]
            .min()
            .reset_index(drop=False)
            .rename(columns={status_date:'min_bvpa_date'})
        )
        
        max_bvpa = (
            bvpa_df
            .groupby(groupby_cols)[status_date]
            .max()
            .reset_index(drop=False)
            .rename(columns={status_date:'max_bvpa_date'})
        )
        
        max_bvpa_intake = (
            intake_df
            .groupby(groupby_cols)[status_date]
            .max()
            .reset_index(drop=False)
            .rename(columns={status_date:'max_bvpa_intake_date'})
        )
        
        min_bvpa_fulfillment = (
            fulfillment_df
            .groupby(groupby_cols)[status_date]
            .min()
            .reset_index(drop=False)
            .rename(columns={status_date:'min_bvpa_fulfillment'})
        )

        return df, min_bvpa, max_bvpa, max_bvpa_intake, min_bvpa_fulfillment
    
    
    @staticmethod
    def _merge_bvpa(df, bvpa_df, min_max, groupby_cols ,status_date):
        
        df = pd.merge(df,min_bvpa,how='left',on=groupby_cols)
        
        if min_max == 'min':
            
            status_date_df = (
                df.groupby(groupby_cols)
                .status_date.min()
            )
            
        elif min_max == 'max':
            status_date_df = (
                df.groupby(groupby_cols)
                .status_date.max()
            )
        
        status_date_df = (
            status_date_df
            .reset_index(drop=False)
            .rename(columns={'status_date':'new_status_date'})
        )
        
        df = pd.merge(df,status_date_df)

        df.loc[df.min_bvpa_date.isna(),'new_bvpa_date'] = df.min_status_date
        
        df = df.drop(labels=['new_status_date'],axis=1)
        
        return df
    
transform = Transform()

In [98]:
## Please place your value assignments for development here!!
## This cell will be turned off in production and Engineering will set to pull from the configuration application instead
## For the last example, this could look like...
## transform.some_ratio = 0.6
## transform.site_name = "WALGREENS"

transform.sort_columns = ['msa_patient_id', 'pharm_code', 'medication', 'status_date']
transform.pjh = 'Patient_Journey_Hierarchy'
transform.ic_status = 'integrichain_status'
transform.ic_substatus = 'integrichain_sub_status'
transform.bvpa = 'BV/PA'
transform.intake = 'INTAKE'
transform.fulfillment = 'FULFILLMENT'
transform.pending = 'PENDING'
transform.substatus_list = ['OTHER','PATIENT CONTACT','PATIENT RESPONSE','PATIENT HOLD','PATIENT FINANCIAL','PRESCRIBER','READY']

### Description
What does this transformation do? be specific.

![what does your transform do](assets/what.gif)

![Pending Enrichment Flowchart](assets/PendingEnrichment.png)

Replaces ambiguous substatus with info based on sub-statuses surrounding the ambiguous field

### Transformation

In [99]:
"""
************ FETCH DATA - TOUCH, BUT CAREFULLY **************
This cell will be turned off in production, as the input_contract will be handled by the pipeline.
"""

if not input_branch:
    input_branch = BRANCH_NAME
input_contract = DatasetContract(branch=input_branch, state=input_state, parent=input_pharma, child=input_brand, dataset=input_name)
run_filter = []
# run_filter.append(dict(partition="__metadata_run_id", comparison="==", values=[1]))
# IF YOU HAVE PUBLISHED DATA MULTIPLE TIMES, uncomment the above line and change the int to the run_id to fetch.
# Otherwise, you will have duplicate values in your fetched dataset!
df = input_contract.fetch(filters=run_filter)

import numpy as np
import pandas as pd
pd.options.display.max_columns=999
# pd.options.display.max_rows=999

df.status_date = df.status_date.str[:8].astype(str)
df.ref_date = df.ref_date.str[:8].astype(str)

df.status_date = pd.to_datetime(df.status_date, infer_datetime_format=True, errors='coerce')
df.ref_date = pd.to_datetime(df.ref_date, infer_datetime_format=True, errors='coerce')

2019-07-31 18:34:14,322 - core.dataset_contract.DatasetContract - INFO - Fetching dataframe from s3 location s3://ichain-dev/sun-extract-validation/sun/ilumya/ingest/symphony_health_association_ingest_column_mapping.


In [100]:
import os
os.chdir('{}'.format(os.path.expanduser('~')))
status_config = pd.read_csv('status_mapping.csv')

status_config.loc[:,'statusCode'] = status_config.statusCode.str.upper()
status_config.loc[:,'subStatus'] = status_config.subStatus.str.upper()
status_config.loc[:,'integrichain_sub_status'] = status_config.integrichain_sub_status.str.upper()
status_config.loc[:,'integrichain_status'] = status_config.integrichain_status.str.upper()
status_config.loc[:,'Patient_Journey_Hierarchy'] = status_config.Patient_Journey_Hierarchy.str.upper()

status_config = status_config.rename(columns={'statusCode':'status_code','subStatus':'sub_status'})

df.sub_status = df.sub_status.str.replace('PRESCRIBERHOLD','PRESCRIBER HOLD')

df = pd.merge(df,status_config,on=['status_code','sub_status'])

df = df[['rec_date', 'pharm_code', 'pharm_npi', 'transtype', 'pharm_transaction_id', 'trans_seq', 'ref_source', 'ref_date', 'program_id', 'pharmacy_id', 'pat_last_name', 'pat_first_name', 'pat_dob', 'pat_gender', 
         'pat_addr1', 'pat_addr2', 'pat_city', 'pat_state', 'pat_zip', 'dx1_code', 'dx2_code', 'status_date', 'status_code', 'sub_status', 'integrichain_status','integrichain_sub_status', 'Patient_Journey_Hierarchy', 
         'pres_last_name', 'pres_first_name', 'pres_addr1', 'pres_addr2', 'pres_city', 'pres_state', 'pres_zip', 'pres_phone', 'pres_npi', 'pres_dea', 'facility_name', 'rxdate', 'rxnumber', 'rxrefills', 'rxfill', 
         'refill_remaining', 'prev_disp', 'rx_ndc_number', 'medication', 'quantity', 'day_supply', 'ship_date', 'ship_carrier', 'shiptracking_num', 'ship_location', 'ship_address', 'ship_city', 'ship_state', 'ship_zip', 
         'has_medical', 'primary_coverage_type', 'primary_payer_name', 'primary_payer_type', 'secondary_coverage_type', 'secondary_payer_name', 'secondary_payer_type', 'plan_paid_amt', 'pat_copay', 'copay_assist_amount', 
         'oth_payer_amt', 'xfer_pharmname', 'msa_patient_id', 'msa_patient_bmap', '__metadata_run_timestamp', '__metadata_app_version', '__metadata_output_contract', '__metadata_transform_timestamp', '__metadata_run_id']]

In [101]:
### Use the variables above to execute your transformation. the final output needs to be a variable named final_dataframe
final_dataframe = transform.pending_enrichment(df)

AttributeError: 'DataFrame' object has no attribute 'min_status_date'

In [62]:
test_df = pd.merge(final_dataframe,min_bvpa,how='left',on=groupby_cols)

In [89]:
min_df = (df.groupby(groupby_cols).status_date.min().reset_index(drop=False).rename(columns={'status_date':'min_status_date'}))

In [83]:
test_df = pd.merge(test_df,min_df)

test_df.loc[test_df.min_bvpa_date.isna(),'min_bvpa_date'] = test_df.min_status_date

In [87]:
test_df

Unnamed: 0,rec_date,pharm_code,pharm_npi,transtype,pharm_transaction_id,trans_seq,ref_source,ref_date,program_id,pharmacy_id,pat_last_name,pat_first_name,pat_dob,pat_gender,pat_addr1,pat_addr2,pat_city,pat_state,pat_zip,dx1_code,dx2_code,status_date,status_code,sub_status,integrichain_status,integrichain_sub_status,Patient_Journey_Hierarchy,pres_last_name,pres_first_name,pres_addr1,pres_addr2,pres_city,pres_state,pres_zip,pres_phone,pres_npi,pres_dea,facility_name,rxdate,rxnumber,rxrefills,rxfill,refill_remaining,prev_disp,rx_ndc_number,medication,quantity,day_supply,ship_date,ship_carrier,shiptracking_num,ship_location,ship_address,ship_city,ship_state,ship_zip,has_medical,primary_coverage_type,primary_payer_name,primary_payer_type,secondary_coverage_type,secondary_payer_name,secondary_payer_type,plan_paid_amt,pat_copay,copay_assist_amount,oth_payer_amt,xfer_pharmname,msa_patient_id,msa_patient_bmap,__metadata_run_timestamp,__metadata_app_version,__metadata_output_contract,__metadata_transform_timestamp,__metadata_run_id,min_bvpa_date,min_status_date
0,20181106 23:00:00,CVS,1043382302,COM,182176830,0,HUB,2018-10-19,1303801,9009919609,,,,M,,,,,30,L40.0,,2018-12-31,ACTIVE,SHIPMENT,ACTIVE,SHIPMENT,FULFILLMENT,CHAO,TOMAS,100 STONEFOREST DR,STE 320,WOODSTOCK,GA,30189,7705165199,1316003577,MC0707286,,20180918,81872197,0,00,0,,47335017795,ILUMYA SD PFS,1,28,20181106 23:00:00,UPS,1Z265561NW85358841,PRESCRIBER OFFICE,"100 STONE FOREST DRIVE,SUITE 320",WOODSTOCK,GA,30,,PHARMACY,,OTHER,,,,,,,,,2120001,VVVVV,2019-07-01 13:25:07,0.0.11,s3://ichain-dev/sun-extract-validation/sun/ilu...,2019-07-01 13:35:27,3,2018-12-31,2018-12-31
1,20181220 23:00:00,CVS,1043382302,COM,183711690,0,HUB,2018-10-19,1303801,9009919609,,,,M,,,,,30,L40.0,,2018-12-31,ACTIVE,SHIPMENT,ACTIVE,SHIPMENT,FULFILLMENT,CHAO,TOMAS,100 STONEFOREST DR,STE 320,WOODSTOCK,GA,30189,7705165199,1316003577,MC0707286,,20180918,81872456,0,00,0,,47335017795,ILUMYA SD PFS,1,31,20181220 23:00:00,UPS,1Z265561NW86226400,PRESCRIBER OFFICE,"100 STONE FOREST DRIVE,100 STONE FOREST DRIVE ...",WOODSTOCK,GA,30,,PHARMACY,,OTHER,,,,,,,,,2120001,VVVVV,2019-07-01 13:25:07,0.0.11,s3://ichain-dev/sun-extract-validation/sun/ilu...,2019-07-01 13:35:27,3,2018-12-31,2018-12-31
2,20181106 23:00:00,CVS,1043382302,COM,182176830,0,HUB,2018-10-19,1303801,9009919609,,,,M,,,,,30,L40.0,,2018-12-31,ACTIVE,SHIPMENT,ACTIVE,SHIPMENT,FULFILLMENT,CHAO,TOMAS,100 STONEFOREST DR,STE 320,WOODSTOCK,GA,30189,7705165199,1316003577,MC0707286,,20180918,81872197,0,00,0,,47335017795,ILUMYA SD PFS,1,28,20181106 23:00:00,UPS,1Z265561NW85358841,PRESCRIBER OFFICE,"100 STONE FOREST DRIVE,SUITE 320",WOODSTOCK,GA,30,,PHARMACY,,OTHER,,,,,,,,,2120001,VVVVV,2019-07-01 13:25:07,0.0.11,s3://ichain-dev/sun-extract-validation/sun/ilu...,2019-07-01 13:35:27,3,2018-12-31,2018-12-31
3,20181220 23:00:00,CVS,1043382302,COM,183711690,0,HUB,2018-10-19,1303801,9009919609,,,,M,,,,,30,L40.0,,2018-12-31,ACTIVE,SHIPMENT,ACTIVE,SHIPMENT,FULFILLMENT,CHAO,TOMAS,100 STONEFOREST DR,STE 320,WOODSTOCK,GA,30189,7705165199,1316003577,MC0707286,,20180918,81872456,0,00,0,,47335017795,ILUMYA SD PFS,1,31,20181220 23:00:00,UPS,1Z265561NW86226400,PRESCRIBER OFFICE,"100 STONE FOREST DRIVE,100 STONE FOREST DRIVE ...",WOODSTOCK,GA,30,,PHARMACY,,OTHER,,,,,,,,,2120001,VVVVV,2019-07-01 13:25:07,0.0.11,s3://ichain-dev/sun-extract-validation/sun/ilu...,2019-07-01 13:35:27,3,2018-12-31,2018-12-31
4,20181106 23:00:00,CVS,1043382302,COM,182176830,0,HUB,2018-10-19,1303801,9009919609,,,,M,,,,,30,L40.0,,2018-12-31,ACTIVE,SHIPMENT,ACTIVE,SHIPMENT,FULFILLMENT,CHAO,TOMAS,100 STONEFOREST DR,STE 320,WOODSTOCK,GA,30189,7705165199,1316003577,MC0707286,,20180918,81872197,0,00,0,,47335017795,ILUMYA SD PFS,1,28,20181106 23:00:00,UPS,1Z265561NW85358841,PRESCRIBER OFFICE,"100 STONE FOREST DRIVE,SUITE 320",WOODSTOCK,GA,30,,PHARMACY,,OTHER,,,,,,,,,2120001,VVVVV,2019-06-26 15:28:20,0.0.11,s3://ichain-dev/sun-extract-validation/sun/ilu...,2019-06-26 15:33:32,4,2018-12-31,2018-12-31
5,20181220 23:00:00,CVS,1043382302,COM,183711690,0,HUB,2018-10-19,1303801,9009919609,,,,M,,,,,30,L40.0,,2018-12-31,ACTIVE,SHIPMENT,ACTIVE,SHIPMENT,FULFILLMENT,CHAO,TOMAS,100 STONEFOREST DR,STE 320,WOODSTOCK,GA,30189,7705165199,1316003577,MC0707286,,20180918,81872456,0,00,0,,47335017795,ILUMYA SD PFS,1,31,20181220 23:00:00,UPS,1Z265561NW86226400,PRESCRIBER OFFICE,"100 STONE FOREST DRIVE,100 STONE FOREST DRIVE ...",WOODSTOCK,GA,30,,PHARMACY,,OTHER,,,,,,,,,2120001,VVVVV,2019-06-26 15:28:20,0.0.11,s3://ichain-dev/sun-extract-validation/sun/ilu...,2019-06-26 15:33:32,4,2018-12-31,2018-12-31
6,20190325 23:00:00,CVS,1518948413,COM,901177673820190325000000,0,DIRECT,2019-03-15,,9011776738,,,,M,,,,,11,L40.0,,2019-03-25,PENDING,PATIENT CONTACT,PENDING,PATIENT CONTACT,FULFILLMENT,COHEN,RUSSELL,258 MERRICK RD,,OCEANSIDE,NY,11572,5167660345,1538167242,BC1293911,,20190315,84604413,0,,0,,47335017795,ILUMYA SD PFS,2,28,,,,,,,,,,MEDICAL,,,,,,,,,,,2120006,VVVVV,2019-07-01 13:25:07,0.0.11,s3://ichain-dev/sun-extract-validation/sun/ilu...,2019-07-01 13:35:27,3,2019-03-25,2019-03-25
7,20190325 23:00:00,CVS,1518948413,COM,901177673820190325000000,0,DIRECT,2019-03-15,,9011776738,,,,M,,,,,11,L40.0,,2019-03-25,PENDING,PATIENT CONTACT,PENDING,PATIENT CONTACT,FULFILLMENT,COHEN,RUSSELL,258 MERRICK RD,,OCEANSIDE,NY,11572,5167660345,1538167242,BC1293911,,20190315,84604413,0,,0,,47335017795,ILUMYA SD PFS,2,28,,,,,,,,,,MEDICAL,,,,,,,,,,,2120006,VVVVV,2019-07-01 13:25:07,0.0.11,s3://ichain-dev/sun-extract-validation/sun/ilu...,2019-07-01 13:35:27,3,2019-03-25,2019-03-25
8,20190325 23:00:00,CVS,1518948413,COM,901177673820190325000000,0,DIRECT,2019-03-15,,9011776738,,,,M,,,,,11,L40.0,,2019-03-25,PENDING,PATIENT CONTACT,PENDING,PATIENT CONTACT,FULFILLMENT,COHEN,RUSSELL,258 MERRICK RD,,OCEANSIDE,NY,11572,5167660345,1538167242,BC1293911,,20190315,84604413,0,,0,,47335017795,ILUMYA SD PFS,2,28,,,,,,,,,,MEDICAL,,,,,,,,,,,2120006,VVVVV,2019-06-26 15:28:20,0.0.11,s3://ichain-dev/sun-extract-validation/sun/ilu...,2019-06-26 15:33:32,4,2019-03-25,2019-03-25
9,20190405 23:00:00,CVS,1518948413,COM,901177673820190405000000,0,DIRECT,2019-03-15,,9011776738,,,,M,,,,,11,L40.0,,2019-04-05,CANCELLED,PATIENT RESPONSE,CANCELLED,PATIENT RESPONSE,PATIENT,COHEN,RUSSELL,258 MERRICK RD,,OCEANSIDE,NY,11572,5167660345,1538167242,BC1293911,,20190315,84604413,0,,0,,47335017795,ILUMYA SD PFS,2,28,,,,,,,,,,MEDICAL,,,,,,,,,,,2120006,VVVVV,2019-07-01 13:25:07,0.0.11,s3://ichain-dev/sun-extract-validation/sun/ilu...,2019-07-01 13:35:27,3,2019-03-25,2019-03-25


In [74]:
test_df.groupby(groupby_cols)[status_date].min()

msa_patient_id  pharm_code  medication             
2120001         CVS         ILUMYA SD PFS             2018-12-31
2120006         CVS         ILUMYA SD PFS             2019-03-25
2120009         BRV         ILUMYA 100MG/ML PFS INJ   2019-01-03
2120012         BRV         ILUMYA 100MG/ML PFS INJ   2018-11-20
2120024         BRV         ILUMYA 100MG/ML PFS INJ   2018-11-20
2120025         ACCREDO     ILUMYA                    2019-06-19
2120026         CGN         ILUMYA INJ 100MG/ML       2018-12-12
                WAG         ILUMYA 100MG/ML PFS 1ML   2019-04-11
2120046         CVS         ILUMYA SD PFS             2019-02-04
2120052         WAG         ILUMYA 100MG/ML PFS 1ML   2018-11-08
2120054         CVS         ILUMYA SD PFS             2019-02-12
2130001         BRV         ILUMYA 100MG/ML PFS INJ   2018-12-13
2130004         BRV         ILUMYA 100MG/ML PFS INJ   2018-11-14
2130005         BRV         ILUMYA 100MG/ML PFS INJ   2018-11-12
2130008         BRV         ILUMYA 100

In [70]:
test_df.fillna(test_df.groupby(groupby_cols)[status_date].transform('min'),axis=1)

NotImplementedError: Currently only can fill with dict/Series column by column

In [64]:
test_df

Unnamed: 0,rec_date,pharm_code,pharm_npi,transtype,pharm_transaction_id,trans_seq,ref_source,ref_date,program_id,pharmacy_id,pat_last_name,pat_first_name,pat_dob,pat_gender,pat_addr1,pat_addr2,pat_city,pat_state,pat_zip,dx1_code,dx2_code,status_date,status_code,sub_status,integrichain_status,integrichain_sub_status,Patient_Journey_Hierarchy,pres_last_name,pres_first_name,pres_addr1,pres_addr2,pres_city,pres_state,pres_zip,pres_phone,pres_npi,pres_dea,facility_name,rxdate,rxnumber,rxrefills,rxfill,refill_remaining,prev_disp,rx_ndc_number,medication,quantity,day_supply,ship_date,ship_carrier,shiptracking_num,ship_location,ship_address,ship_city,ship_state,ship_zip,has_medical,primary_coverage_type,primary_payer_name,primary_payer_type,secondary_coverage_type,secondary_payer_name,secondary_payer_type,plan_paid_amt,pat_copay,copay_assist_amount,oth_payer_amt,xfer_pharmname,msa_patient_id,msa_patient_bmap,__metadata_run_timestamp,__metadata_app_version,__metadata_output_contract,__metadata_transform_timestamp,__metadata_run_id,min_bvpa_date
0,20181106 23:00:00,CVS,1043382302,COM,182176830,0,HUB,2018-10-19,1303801,9009919609,,,,M,,,,,30,L40.0,,2018-12-31,ACTIVE,SHIPMENT,ACTIVE,SHIPMENT,FULFILLMENT,CHAO,TOMAS,100 STONEFOREST DR,STE 320,WOODSTOCK,GA,30189,7705165199,1316003577,MC0707286,,20180918,81872197,0,00,0,,47335017795,ILUMYA SD PFS,1,28,20181106 23:00:00,UPS,1Z265561NW85358841,PRESCRIBER OFFICE,"100 STONE FOREST DRIVE,SUITE 320",WOODSTOCK,GA,30,,PHARMACY,,OTHER,,,,,,,,,2120001,VVVVV,2019-07-01 13:25:07,0.0.11,s3://ichain-dev/sun-extract-validation/sun/ilu...,2019-07-01 13:35:27,3,NaT
1,20181220 23:00:00,CVS,1043382302,COM,183711690,0,HUB,2018-10-19,1303801,9009919609,,,,M,,,,,30,L40.0,,2018-12-31,ACTIVE,SHIPMENT,ACTIVE,SHIPMENT,FULFILLMENT,CHAO,TOMAS,100 STONEFOREST DR,STE 320,WOODSTOCK,GA,30189,7705165199,1316003577,MC0707286,,20180918,81872456,0,00,0,,47335017795,ILUMYA SD PFS,1,31,20181220 23:00:00,UPS,1Z265561NW86226400,PRESCRIBER OFFICE,"100 STONE FOREST DRIVE,100 STONE FOREST DRIVE ...",WOODSTOCK,GA,30,,PHARMACY,,OTHER,,,,,,,,,2120001,VVVVV,2019-07-01 13:25:07,0.0.11,s3://ichain-dev/sun-extract-validation/sun/ilu...,2019-07-01 13:35:27,3,NaT
2,20181106 23:00:00,CVS,1043382302,COM,182176830,0,HUB,2018-10-19,1303801,9009919609,,,,M,,,,,30,L40.0,,2018-12-31,ACTIVE,SHIPMENT,ACTIVE,SHIPMENT,FULFILLMENT,CHAO,TOMAS,100 STONEFOREST DR,STE 320,WOODSTOCK,GA,30189,7705165199,1316003577,MC0707286,,20180918,81872197,0,00,0,,47335017795,ILUMYA SD PFS,1,28,20181106 23:00:00,UPS,1Z265561NW85358841,PRESCRIBER OFFICE,"100 STONE FOREST DRIVE,SUITE 320",WOODSTOCK,GA,30,,PHARMACY,,OTHER,,,,,,,,,2120001,VVVVV,2019-07-01 13:25:07,0.0.11,s3://ichain-dev/sun-extract-validation/sun/ilu...,2019-07-01 13:35:27,3,NaT
3,20181220 23:00:00,CVS,1043382302,COM,183711690,0,HUB,2018-10-19,1303801,9009919609,,,,M,,,,,30,L40.0,,2018-12-31,ACTIVE,SHIPMENT,ACTIVE,SHIPMENT,FULFILLMENT,CHAO,TOMAS,100 STONEFOREST DR,STE 320,WOODSTOCK,GA,30189,7705165199,1316003577,MC0707286,,20180918,81872456,0,00,0,,47335017795,ILUMYA SD PFS,1,31,20181220 23:00:00,UPS,1Z265561NW86226400,PRESCRIBER OFFICE,"100 STONE FOREST DRIVE,100 STONE FOREST DRIVE ...",WOODSTOCK,GA,30,,PHARMACY,,OTHER,,,,,,,,,2120001,VVVVV,2019-07-01 13:25:07,0.0.11,s3://ichain-dev/sun-extract-validation/sun/ilu...,2019-07-01 13:35:27,3,NaT
4,20181106 23:00:00,CVS,1043382302,COM,182176830,0,HUB,2018-10-19,1303801,9009919609,,,,M,,,,,30,L40.0,,2018-12-31,ACTIVE,SHIPMENT,ACTIVE,SHIPMENT,FULFILLMENT,CHAO,TOMAS,100 STONEFOREST DR,STE 320,WOODSTOCK,GA,30189,7705165199,1316003577,MC0707286,,20180918,81872197,0,00,0,,47335017795,ILUMYA SD PFS,1,28,20181106 23:00:00,UPS,1Z265561NW85358841,PRESCRIBER OFFICE,"100 STONE FOREST DRIVE,SUITE 320",WOODSTOCK,GA,30,,PHARMACY,,OTHER,,,,,,,,,2120001,VVVVV,2019-06-26 15:28:20,0.0.11,s3://ichain-dev/sun-extract-validation/sun/ilu...,2019-06-26 15:33:32,4,NaT
5,20181220 23:00:00,CVS,1043382302,COM,183711690,0,HUB,2018-10-19,1303801,9009919609,,,,M,,,,,30,L40.0,,2018-12-31,ACTIVE,SHIPMENT,ACTIVE,SHIPMENT,FULFILLMENT,CHAO,TOMAS,100 STONEFOREST DR,STE 320,WOODSTOCK,GA,30189,7705165199,1316003577,MC0707286,,20180918,81872456,0,00,0,,47335017795,ILUMYA SD PFS,1,31,20181220 23:00:00,UPS,1Z265561NW86226400,PRESCRIBER OFFICE,"100 STONE FOREST DRIVE,100 STONE FOREST DRIVE ...",WOODSTOCK,GA,30,,PHARMACY,,OTHER,,,,,,,,,2120001,VVVVV,2019-06-26 15:28:20,0.0.11,s3://ichain-dev/sun-extract-validation/sun/ilu...,2019-06-26 15:33:32,4,NaT
6,20190308 23:00:00,CVS,1043382302,COM,901165655620190308000000,0,HUB,2019-03-08,1337729,9011656556,,,,M,,,,,08,,,2019-03-08,PENDING,NEW,PENDING,NEW,INTAKE,PRADEEP,MEERA,347 MT PLEASANT AVE,STE 103,WEST ORANGE,NJ,07052,9735712121,1346529948,MP3244314,,,,,,,,,,,,,,,,,,,,,MEDICAL,,,,,,,,,,,2120004,VVVVV,2019-07-01 13:25:07,0.0.11,s3://ichain-dev/sun-extract-validation/sun/ilu...,2019-07-01 13:35:27,3,NaT
7,20190308 23:00:00,CVS,1043382302,COM,901165655620190308000000,0,HUB,2019-03-08,1337729,9011656556,,,,M,,,,,08,,,2019-03-08,PENDING,NEW,PENDING,NEW,INTAKE,PRADEEP,MEERA,347 MT PLEASANT AVE,STE 103,WEST ORANGE,NJ,07052,9735712121,1346529948,MP3244314,,,,,,,,,,,,,,,,,,,,,MEDICAL,,,,,,,,,,,2120004,VVVVV,2019-07-01 13:25:07,0.0.11,s3://ichain-dev/sun-extract-validation/sun/ilu...,2019-07-01 13:35:27,3,NaT
8,20190308 23:00:00,CVS,1043382302,COM,901165655620190308000000,0,HUB,2019-03-08,1337729,9011656556,,,,M,,,,,08,,,2019-03-08,PENDING,NEW,PENDING,NEW,INTAKE,PRADEEP,MEERA,347 MT PLEASANT AVE,STE 103,WEST ORANGE,NJ,07052,9735712121,1346529948,MP3244314,,,,,,,,,,,,,,,,,,,,,MEDICAL,,,,,,,,,,,2120004,VVVVV,2019-06-26 15:28:20,0.0.11,s3://ichain-dev/sun-extract-validation/sun/ilu...,2019-06-26 15:33:32,4,NaT
9,20190311 23:00:00,CVS,1518948413,COM,901165655620190311000000,0,HUB,2019-03-08,1337729,9011656556,,,,M,,,,,08,,,2019-03-11,PENDING,APPEAL,PENDING,APPEAL,BV/PA,PRADEEP,MEERA,347 MT PLEASANT AVE,STE 103,WEST ORANGE,NJ,07052,9735712121,1346529948,MP3244314,,,,,,,,,,,,,,,,,,,,,MEDICAL,,,,,,,,,,,2120004,VVVVV,2019-07-01 13:25:07,0.0.11,s3://ichain-dev/sun-extract-validation/sun/ilu...,2019-07-01 13:35:27,3,NaT


In [63]:
test_df.groupby(groupby_cols)[status_date].transform('min')

0       2018-12-31
1       2018-12-31
2       2018-12-31
3       2018-12-31
4       2018-12-31
5       2018-12-31
6              NaT
7              NaT
8              NaT
9              NaT
10             NaT
11             NaT
12             NaT
13             NaT
14             NaT
15      2019-03-25
16      2019-03-25
17      2019-03-25
18      2019-03-25
19      2019-03-25
20      2019-03-25
21             NaT
22             NaT
23             NaT
24      2019-01-03
25      2019-01-03
26      2019-01-03
27      2019-01-03
28      2019-01-03
29      2019-01-03
           ...    
37375          NaT
37376          NaT
37377          NaT
37378          NaT
37379          NaT
37380          NaT
37381          NaT
37382          NaT
37383          NaT
37384          NaT
37385          NaT
37386          NaT
37387          NaT
37388          NaT
37389          NaT
37390          NaT
37391          NaT
37392          NaT
37393          NaT
37394          NaT
37395          NaT
37396       

In [59]:
test_df

Unnamed: 0,rec_date,pharm_code,pharm_npi,transtype,pharm_transaction_id,trans_seq,ref_source,ref_date,program_id,pharmacy_id,pat_last_name,pat_first_name,pat_dob,pat_gender,pat_addr1,pat_addr2,pat_city,pat_state,pat_zip,dx1_code,dx2_code,status_date,status_code,sub_status,integrichain_status,integrichain_sub_status,Patient_Journey_Hierarchy,pres_last_name,pres_first_name,pres_addr1,pres_addr2,pres_city,pres_state,pres_zip,pres_phone,pres_npi,pres_dea,facility_name,rxdate,rxnumber,rxrefills,rxfill,refill_remaining,prev_disp,rx_ndc_number,medication,quantity,day_supply,ship_date,ship_carrier,shiptracking_num,ship_location,ship_address,ship_city,ship_state,ship_zip,has_medical,primary_coverage_type,primary_payer_name,primary_payer_type,secondary_coverage_type,secondary_payer_name,secondary_payer_type,plan_paid_amt,pat_copay,copay_assist_amount,oth_payer_amt,xfer_pharmname,msa_patient_id,msa_patient_bmap,__metadata_run_timestamp,__metadata_app_version,__metadata_output_contract,__metadata_transform_timestamp,__metadata_run_id,min_bvpa_date
0,20181106 23:00:00,CVS,1043382302,COM,182176830,0,HUB,2018-10-19,1303801,9009919609,,,,M,,,,,30,L40.0,,2018-12-31,ACTIVE,SHIPMENT,ACTIVE,SHIPMENT,FULFILLMENT,CHAO,TOMAS,100 STONEFOREST DR,STE 320,WOODSTOCK,GA,30189,7705165199,1316003577,MC0707286,,20180918,81872197,0,00,0,,47335017795,ILUMYA SD PFS,1,28,20181106 23:00:00,UPS,1Z265561NW85358841,PRESCRIBER OFFICE,"100 STONE FOREST DRIVE,SUITE 320",WOODSTOCK,GA,30,,PHARMACY,,OTHER,,,,,,,,,2120001,VVVVV,2019-07-01 13:25:07,0.0.11,s3://ichain-dev/sun-extract-validation/sun/ilu...,2019-07-01 13:35:27,3,2018-12-31
1,20181220 23:00:00,CVS,1043382302,COM,183711690,0,HUB,2018-10-19,1303801,9009919609,,,,M,,,,,30,L40.0,,2018-12-31,ACTIVE,SHIPMENT,ACTIVE,SHIPMENT,FULFILLMENT,CHAO,TOMAS,100 STONEFOREST DR,STE 320,WOODSTOCK,GA,30189,7705165199,1316003577,MC0707286,,20180918,81872456,0,00,0,,47335017795,ILUMYA SD PFS,1,31,20181220 23:00:00,UPS,1Z265561NW86226400,PRESCRIBER OFFICE,"100 STONE FOREST DRIVE,100 STONE FOREST DRIVE ...",WOODSTOCK,GA,30,,PHARMACY,,OTHER,,,,,,,,,2120001,VVVVV,2019-07-01 13:25:07,0.0.11,s3://ichain-dev/sun-extract-validation/sun/ilu...,2019-07-01 13:35:27,3,2018-12-31
2,20181106 23:00:00,CVS,1043382302,COM,182176830,0,HUB,2018-10-19,1303801,9009919609,,,,M,,,,,30,L40.0,,2018-12-31,ACTIVE,SHIPMENT,ACTIVE,SHIPMENT,FULFILLMENT,CHAO,TOMAS,100 STONEFOREST DR,STE 320,WOODSTOCK,GA,30189,7705165199,1316003577,MC0707286,,20180918,81872197,0,00,0,,47335017795,ILUMYA SD PFS,1,28,20181106 23:00:00,UPS,1Z265561NW85358841,PRESCRIBER OFFICE,"100 STONE FOREST DRIVE,SUITE 320",WOODSTOCK,GA,30,,PHARMACY,,OTHER,,,,,,,,,2120001,VVVVV,2019-07-01 13:25:07,0.0.11,s3://ichain-dev/sun-extract-validation/sun/ilu...,2019-07-01 13:35:27,3,2018-12-31
3,20181220 23:00:00,CVS,1043382302,COM,183711690,0,HUB,2018-10-19,1303801,9009919609,,,,M,,,,,30,L40.0,,2018-12-31,ACTIVE,SHIPMENT,ACTIVE,SHIPMENT,FULFILLMENT,CHAO,TOMAS,100 STONEFOREST DR,STE 320,WOODSTOCK,GA,30189,7705165199,1316003577,MC0707286,,20180918,81872456,0,00,0,,47335017795,ILUMYA SD PFS,1,31,20181220 23:00:00,UPS,1Z265561NW86226400,PRESCRIBER OFFICE,"100 STONE FOREST DRIVE,100 STONE FOREST DRIVE ...",WOODSTOCK,GA,30,,PHARMACY,,OTHER,,,,,,,,,2120001,VVVVV,2019-07-01 13:25:07,0.0.11,s3://ichain-dev/sun-extract-validation/sun/ilu...,2019-07-01 13:35:27,3,2018-12-31
4,20181106 23:00:00,CVS,1043382302,COM,182176830,0,HUB,2018-10-19,1303801,9009919609,,,,M,,,,,30,L40.0,,2018-12-31,ACTIVE,SHIPMENT,ACTIVE,SHIPMENT,FULFILLMENT,CHAO,TOMAS,100 STONEFOREST DR,STE 320,WOODSTOCK,GA,30189,7705165199,1316003577,MC0707286,,20180918,81872197,0,00,0,,47335017795,ILUMYA SD PFS,1,28,20181106 23:00:00,UPS,1Z265561NW85358841,PRESCRIBER OFFICE,"100 STONE FOREST DRIVE,SUITE 320",WOODSTOCK,GA,30,,PHARMACY,,OTHER,,,,,,,,,2120001,VVVVV,2019-06-26 15:28:20,0.0.11,s3://ichain-dev/sun-extract-validation/sun/ilu...,2019-06-26 15:33:32,4,2018-12-31
5,20181220 23:00:00,CVS,1043382302,COM,183711690,0,HUB,2018-10-19,1303801,9009919609,,,,M,,,,,30,L40.0,,2018-12-31,ACTIVE,SHIPMENT,ACTIVE,SHIPMENT,FULFILLMENT,CHAO,TOMAS,100 STONEFOREST DR,STE 320,WOODSTOCK,GA,30189,7705165199,1316003577,MC0707286,,20180918,81872456,0,00,0,,47335017795,ILUMYA SD PFS,1,31,20181220 23:00:00,UPS,1Z265561NW86226400,PRESCRIBER OFFICE,"100 STONE FOREST DRIVE,100 STONE FOREST DRIVE ...",WOODSTOCK,GA,30,,PHARMACY,,OTHER,,,,,,,,,2120001,VVVVV,2019-06-26 15:28:20,0.0.11,s3://ichain-dev/sun-extract-validation/sun/ilu...,2019-06-26 15:33:32,4,2018-12-31
6,20190308 23:00:00,CVS,1043382302,COM,901165655620190308000000,0,HUB,2019-03-08,1337729,9011656556,,,,M,,,,,08,,,2019-03-08,PENDING,NEW,PENDING,NEW,INTAKE,PRADEEP,MEERA,347 MT PLEASANT AVE,STE 103,WEST ORANGE,NJ,07052,9735712121,1346529948,MP3244314,,,,,,,,,,,,,,,,,,,,,MEDICAL,,,,,,,,,,,2120004,VVVVV,2019-07-01 13:25:07,0.0.11,s3://ichain-dev/sun-extract-validation/sun/ilu...,2019-07-01 13:35:27,3,NaT
7,20190308 23:00:00,CVS,1043382302,COM,901165655620190308000000,0,HUB,2019-03-08,1337729,9011656556,,,,M,,,,,08,,,2019-03-08,PENDING,NEW,PENDING,NEW,INTAKE,PRADEEP,MEERA,347 MT PLEASANT AVE,STE 103,WEST ORANGE,NJ,07052,9735712121,1346529948,MP3244314,,,,,,,,,,,,,,,,,,,,,MEDICAL,,,,,,,,,,,2120004,VVVVV,2019-07-01 13:25:07,0.0.11,s3://ichain-dev/sun-extract-validation/sun/ilu...,2019-07-01 13:35:27,3,NaT
8,20190308 23:00:00,CVS,1043382302,COM,901165655620190308000000,0,HUB,2019-03-08,1337729,9011656556,,,,M,,,,,08,,,2019-03-08,PENDING,NEW,PENDING,NEW,INTAKE,PRADEEP,MEERA,347 MT PLEASANT AVE,STE 103,WEST ORANGE,NJ,07052,9735712121,1346529948,MP3244314,,,,,,,,,,,,,,,,,,,,,MEDICAL,,,,,,,,,,,2120004,VVVVV,2019-06-26 15:28:20,0.0.11,s3://ichain-dev/sun-extract-validation/sun/ilu...,2019-06-26 15:33:32,4,NaT
9,20190311 23:00:00,CVS,1518948413,COM,901165655620190311000000,0,HUB,2019-03-08,1337729,9011656556,,,,M,,,,,08,,,2019-03-11,PENDING,APPEAL,PENDING,APPEAL,BV/PA,PRADEEP,MEERA,347 MT PLEASANT AVE,STE 103,WEST ORANGE,NJ,07052,9735712121,1346529948,MP3244314,,,,,,,,,,,,,,,,,,,,,MEDICAL,,,,,,,,,,,2120004,VVVVV,2019-07-01 13:25:07,0.0.11,s3://ichain-dev/sun-extract-validation/sun/ilu...,2019-07-01 13:35:27,3,NaT


In [39]:
groupby_cols = transform.sort_columns[:-1]
status_date = transform.sort_columns[-1]

# df = pd.merge(df,min_bvpa,how='left',on=groupby_cols).fillna(value=df[status_date].min()) # Add in fillna with max status date
# df = pd.merge(df,max_bvpa_intake,how='left',on=groupby_cols).fillna(value=df[status_date].max())
# df = pd.merge(df,max_bvpa,how='left',on=groupby_cols).fillna(value=df[status_date].max())
# df = pd.merge(df,min_bvpa_fulfillment,how='left',on=groupby_cols).fillna(value=df[status_date].min())

In [29]:
import unittest

def shape_status(final_dataframe,df):
    return df.shape[0] == final_dataframe.shape[0]

def substatus_cleaned(final_dataframe,sub_col):
    return final_dataframe[final_dataframe[sub_col].isna()]

class TestNotebook(unittest.TestCase):
    
    def test_shape_status(self):
        self.assertEqual(shape_status(final_dataframe,df),True)
        
    def test_substatus_cleaned(self):
        substatus_cleaned(final_dataframe,'Patient_Journey_Hierarchy')
    
unittest.main(argv=[''],verbosity=2,exit=False)

test_shape_status (__main__.TestNotebook) ... ok
test_substatus_cleaned (__main__.TestNotebook) ... ok

----------------------------------------------------------------------
Ran 2 tests in 0.056s

OK


<unittest.main.TestProgram at 0x7fec4de39828>

In [101]:
df_slice = ['msa_patient_id','pharm_code','medication','status_date','integrichain_status','integrichain_sub_status','Patient_Journey_Hierarchy',
            'Before_min_fulfillment','Before_min_BVPA','After_max_BVPA','After_max_intake','min_bvpa_date','max_bvpa_intake_date','max_bvpa_date','min_bvpa_fulfillment']

In [102]:
orig_df = df[(df.integrichain_status == 'PENDING') & (df.integrichain_sub_status.isin(transform.substatus_list))][df_slice[:7]]
review_df = final_dataframe[(final_dataframe.integrichain_status == 'PENDING') & (final_dataframe.integrichain_sub_status.isin(transform.substatus_list))][df_slice]

In [103]:
unique_id_dict = (
    review_df
    [['msa_patient_id','pharm_code','medication']]
    .drop_duplicates()
    .fillna(value=np.nan)
    .reset_index(drop=True)
    .to_dict(orient='index')
)

In [104]:
unique_id_dict[0]['medication']

nan

In [105]:
print(unique_id_dict[0]['msa_patient_id'])
print(unique_id_dict[0]['pharm_code'])
print(unique_id_dict[0]['medication'])

2120004
CVS
nan


In [106]:
review_df[review_df.msa_patient_id == '2120001']

Unnamed: 0,msa_patient_id,pharm_code,medication,status_date,integrichain_status,integrichain_sub_status,Patient_Journey_Hierarchy,Before_min_fulfillment,Before_min_BVPA,After_max_BVPA,After_max_intake,min_bvpa_date,max_bvpa_intake_date,max_bvpa_date,min_bvpa_fulfillment


In [119]:
i = 6
orig_df[(orig_df.msa_patient_id == unique_id_dict[i]['msa_patient_id']) & (orig_df.pharm_code == unique_id_dict[i]['pharm_code']) & (orig_df.medication == unique_id_dict[i]['medication'])].sort_values(['msa_patient_id','pharm_code','medication','status_date'])

Unnamed: 0,msa_patient_id,pharm_code,medication,status_date,integrichain_status,integrichain_sub_status,Patient_Journey_Hierarchy
14825,2120025,ACCREDO,ILUMYA,2019-06-19,PENDING,NEW,INTAKE
15289,2120025,ACCREDO,ILUMYA,2019-06-19,PENDING,NEW,INTAKE


In [120]:
review_df[(review_df.msa_patient_id == unique_id_dict[i]['msa_patient_id']) & (review_df.pharm_code == unique_id_dict[i]['pharm_code']) & (review_df.medication == unique_id_dict[i]['medication'])]

Unnamed: 0,msa_patient_id,pharm_code,medication,status_date,integrichain_status,integrichain_sub_status,Patient_Journey_Hierarchy,Before_min_fulfillment,Before_min_BVPA,After_max_BVPA,After_max_intake,min_bvpa_date,max_bvpa_intake_date,max_bvpa_date,min_bvpa_fulfillment
90,2120025,ACCREDO,ILUMYA,2019-06-19,PENDING,NEW,BV/PA,0.0,0.0,0.0,0.0,NaT,2019-06-19,NaT,NaT
91,2120025,ACCREDO,ILUMYA,2019-06-19,PENDING,NEW,BV/PA,0.0,0.0,0.0,0.0,NaT,2019-06-19,NaT,NaT


In [None]:
enrich_df.loc[:,self.pjh] = (
            np.where(enrich_df.loc[:,'Before_min_fulfillment'] == 1, self.intake,
            np.where((enrich_df.loc[:,'Before_min_BVPA'] == 0) & (enrich_df.loc[:,'After_max_BVPA'] == 0),self.bvpa,
            np.where(enrich_df.loc[:,'After_max_intake'] == 1, self.fulfillment,
            enrich_df.loc[:,self.pjh])))
        )

In [108]:
i = 9
orig_df[(orig_df.msa_patient_id == unique_id_dict[i]['msa_patient_id']) & (orig_df.pharm_code == unique_id_dict[i]['pharm_code']) & (orig_df.medication == unique_id_dict[i]['medication'])].sort_values(['msa_patient_id','pharm_code','medication','status_date'])

Unnamed: 0,msa_patient_id,pharm_code,medication,status_date,integrichain_status,integrichain_sub_status,Patient_Journey_Hierarchy
15028,2120026,WAG,ILUMYA 100MG/ML PFS 1ML,2019-04-11,PENDING,NEW,INTAKE
15045,2120026,WAG,ILUMYA 100MG/ML PFS 1ML,2019-04-11,PENDING,NEW,INTAKE
15151,2120026,WAG,ILUMYA 100MG/ML PFS 1ML,2019-04-11,PENDING,NEW,INTAKE
26541,2120026,WAG,ILUMYA 100MG/ML PFS 1ML,2019-04-12,PENDING,PRESCRIBER,FULFILLMENT
26599,2120026,WAG,ILUMYA 100MG/ML PFS 1ML,2019-04-12,PENDING,PRESCRIBER,FULFILLMENT
26936,2120026,WAG,ILUMYA 100MG/ML PFS 1ML,2019-04-12,PENDING,PRESCRIBER,FULFILLMENT


In [109]:
review_df[(review_df.msa_patient_id == unique_id_dict[i]['msa_patient_id']) & (review_df.pharm_code == unique_id_dict[i]['pharm_code']) & (review_df.medication == unique_id_dict[i]['medication'])].sort_values(['msa_patient_id','pharm_code','medication','status_date'])

Unnamed: 0,msa_patient_id,pharm_code,medication,status_date,integrichain_status,integrichain_sub_status,Patient_Journey_Hierarchy,Before_min_fulfillment,Before_min_BVPA,After_max_BVPA,After_max_intake,min_bvpa_date,max_bvpa_intake_date,max_bvpa_date,min_bvpa_fulfillment
145,2120026,WAG,ILUMYA 100MG/ML PFS 1ML,2019-04-11,PENDING,NEW,INTAKE,1.0,0.0,0.0,0.0,NaT,2019-04-11,NaT,2019-04-12
146,2120026,WAG,ILUMYA 100MG/ML PFS 1ML,2019-04-11,PENDING,NEW,INTAKE,1.0,0.0,0.0,0.0,NaT,2019-04-11,NaT,2019-04-12
147,2120026,WAG,ILUMYA 100MG/ML PFS 1ML,2019-04-11,PENDING,NEW,INTAKE,1.0,0.0,0.0,0.0,NaT,2019-04-11,NaT,2019-04-12
148,2120026,WAG,ILUMYA 100MG/ML PFS 1ML,2019-04-12,PENDING,PRESCRIBER,BV/PA,0.0,0.0,0.0,1.0,NaT,2019-04-11,NaT,2019-04-12
149,2120026,WAG,ILUMYA 100MG/ML PFS 1ML,2019-04-12,PENDING,PRESCRIBER,BV/PA,0.0,0.0,0.0,1.0,NaT,2019-04-11,NaT,2019-04-12
150,2120026,WAG,ILUMYA 100MG/ML PFS 1ML,2019-04-12,PENDING,PRESCRIBER,BV/PA,0.0,0.0,0.0,1.0,NaT,2019-04-11,NaT,2019-04-12


### Publish

In [None]:
## that's it - just provide the final dataframe to the var final_dataframe and we take it from there
transform.publish_contract.publish(final_dataframe, run_id, session)
session.close()