In [None]:
from core.helpers.session_helper import SessionHelper
session = SessionHelper().session

In [None]:
"""
************ SETUP - DON'T TOUCH **************
This section imports data from the configuration database
and should not need to be altered or otherwise messed with. 
~~These are not the droids you are looking for~~
"""
from core.constants import BRANCH_NAME, ENV_BUCKET
from core.models.configuration import Transformation
from dataclasses import dataclass
from core.dataset_contract import DatasetContract

db_transform = session.query(Transformation).filter(Transformation.id == transform_id).one()

@dataclass
class DbTransform:
    id: int = db_transform.id ## the instance id of the transform in the config app
    name: str = db_transform.transformation_template.name ## the transform name in the config app
    state: str = db_transform.pipeline_state.pipeline_state_type.name ## the pipeline state, one of raw, ingest, master, enhance, enrich, metrics, dimensional
    branch:str = BRANCH_NAME ## the git branch for this execution 
    brand: str = db_transform.pipeline_state.pipeline.brand.name ## the pharma brand name
    pharmaceutical_company: str = db_transform.pipeline_state.pipeline.brand.pharmaceutical_company.name # the pharma company name
    publish_contract: DatasetContract = DatasetContract(branch=BRANCH_NAME,
                            state=db_transform.pipeline_state.pipeline_state_type.name,
                            parent=db_transform.pipeline_state.pipeline.brand.pharmaceutical_company.name,
                            child=db_transform.pipeline_state.pipeline.brand.name,
                            dataset=db_transform.transformation_template.name)


# CORE Cartridge Notebook::Patient Status Fill Rate
![CORE Logo](assets/coreLogo.png) 

---
## Keep in Mind
Good Transforms Are...
- **singular in purpose:** good transforms do one and only one thing, and handle all known cases for that thing. 
- **repeatable:** transforms should be written in a way that they can be run against the same dataset an infinate number of times and get the same result every time. 
- **easy to read:** 99 times out of 100, readable, clear code that runs a little slower is more valuable than a mess that runs quickly. 
- **No 'magic numbers':** if a variable or function is not instantly obvious as to what it is or does, without context, maybe consider renaming it.

## Workflow - how to use this notebook to make science
#### Data Science
1. **Document your transform.** Fill out the _description_ cell below describing what it is this transform does; this will appear in the configuration application where Ops will create, configure and update pipelines. 
1. **Define your config object.** Fill out the _configuration_ cell below the commented-out guide to define the variables you want ops to set in the configuration application (these will populate here for every pipeline). 
2. **Build your transformation logic.** Use the transformation cell to do that magic that you do. 
![caution](assets/cautionTape.png)

### Configuration

In [None]:
""" 
********* VARIABLES - PLEASE TOUCH ********* 
This section defines what you expect to get from the configuration application 
in a single "transform" object. Define the vars you need here, and comment inline to the right of them 
for all-in-one documentation. 
Engineering will build a production "transform" object for every pipeline that matches what you define here.

@@@ FORMAT OF THE DATA CLASS IS: @@@ 

<variable_name>: <data_type> #<comment explaining what the value is to future us>

e.g.

class Transform(DbTransform):
    some_ratio: float
    site_name: str

~~These ARE the droids you are looking for~~
"""

class Transform(DbTransform):
    '''
    YOUR properties go here!!
    Variable properties should be assigned to the exact name of
    the transformation as it appears in the Jupyter notebook filename.
    '''
    
#     pat_id: str = db_transform.variables.pat_id # Patiend ID column
#     pharm_name: str = db_transform.variables.pharm_name # Pharmacy Name column
#     cs_outlet_id: str = db_transform.variables.cs_outlet_id # Outlet ID column
#     brand_id: str = db_transform.variables.brand_id # Brand Name column
#     diagnosis_code: str = db_transform.variables.diagnosis_code # Diagnosis column
#     status_date: str = db_transform.variables.status_date # Status Date column
#     ref_date: str = db_transform.variables.ref_date # Referral Date column
#     transaction_id: str = db_transform.variables.transaction_id # Transaction ID column
#     ref_source: str = db_transform.variables.ref_source # Referral Source column
#     provider_fn: str = db_transform.variables.provider_fn # Provider First Name column
#     provider_ln: str = db_transform.variables.provider_ln # Provider Last Name column
#     provider_s: str = db_transform.variables.provider_s # Provider State column
#     provider_z: str = db_transform.variables.provider_z # Provider Zip Code column
#     payer: str = db_transform.variables.payer # Payer Name column
#     payer_type: str = db_transform.variables.payer_type # Payer Type column
        
    input_transform: str = db_transform.variables.input_transform # The name of the transform to input source data from
    ic_status: str = db_transform.variables.ic_status # column name of integrichain status
    ic_sub_status: str = db_transform.variables.ic_sub_status # column name of integrichain sub status
    pjh: str = db_transform.variables.pjh # column name of patient journey heirarchy
    shipment_status: str = db_transform.variables.shipment_status # string of shipment status. Should be something like 'SHIPMENT'
    transfered_status: str = db_transform.variables.transfered_status # string of transfered status. Should be something like 'TRANSFERRED'
    cancelled_status: str = db_transform.variables.cancelled_status # string of cancelled status. Should be something like 'CANCELLED'
    open_status: str = db_transform.variables.open_status # string of cancelled status. Should be something like 'OPEN'
    filled_status: str = db_transform.variables.filled_status # string of cancelled status. Should be something like 'FILLED'
    referral_status: str = db_transform.variables.referral_status # column name of referral status. Should be something like 'referral_status'
    
    
    def fill_rate_schema(self,df):
        
        col_ids = [self.pat_id, self.pharm_name, self.brand_id]
        df = df.sort_values([self.pat_id, self.pharm_name, self.brand_id, self.status_date],ascending=[True, True, True, True])
        
        min_df = self._return_groupby_column(df, col_ids, self.status_date, 'min', 'status_date_to_use', filter_column=self.ic_sub_status, df_filter=self.shipment_status, comparison='==')
        max_df = self._return_groupby_column(df, col_ids, self.status_date, 'max', 'non_active_date', filter_column=self.ic_sub_status, df_filter=self.shipment_status, comparison='!=')
        min_max_df = pd.merge(min_df,max_df,on=col_ids,how='outer')
        min_max_df.loc[min_max_df.status_date_to_use.isna(),'status_date_to_use'] = min_max_df.non_active_date
        min_max_df = min_max_df.drop(labels=['non_active_date'],axis=1)
        df = pd.merge(df,min_max_df,on=col_ids)
                
        df = self._format_transaction_id(df)
        
        df['to_use_date'] = np.where(df[self.status_date] == df['status_date_to_use'],1,0)
        df = df[df.to_use_date == 1][[self.pat_id, self.pharm_name, self.brand_id, self.transaction_id, 'adjusted_transaction_id', self.status_date, 
                                      self.ref_source, self.provider_fn, self.provider_ln, self.provider_s, self.payer, self.payer_type, self.ref_date, 
                                      self.ic_status, self.ic_sub_status, self.pjh, self.cs_outlet_id, self.diagnosis_code, self.provider_z]].drop_duplicates()
        
        max_transaction_id_df = self._return_groupby_column(df,col_ids,'adjusted_transaction_id','max','max_transaction_id')
        df = pd.merge(df,max_transaction_id_df,on=col_ids)
        df['max_transaction'] = np.where(df.adjusted_transaction_id == df['max_transaction_id'],1,0)

        df = (
            df
            [df.max_transaction == 1]
            .drop(labels=['max_transaction','max_transaction_id','adjusted_transaction_id'],axis=1)
        )
        
        df = self._create_one_hot_encoding(df)
        
        df = (
            df
            .drop_duplicates(subset=[self.pat_id, self.pharm_name, self.brand_id, self.filled, self.transferred, self.cancelled, self.still_open])
            .reset_index(drop=True)
        )
        
        df.loc[df[self.filled] == 1,self.firstfilldate] = df[self.status_date]
        df[self.firstfilldate] = pd.to_datetime(df[self.firstfilldate])

        df.loc[df[self.cancelled] == 1,self.canceldate] = df[self.status_date]
        df[self.canceldate] = pd.to_datetime(df[self.canceldate])
        
        df.loc[df.filled == 1, self.referral_status] = self.filled_status
        df.loc[df.cancelled == 1, self.referral_status] = self.cancelled_status
        df.loc[(df.transferred == 1) | (df.still_open == 1), self.referral_status] = self.open_status
        
        df.loc[df.cancelled == 1,self.cancelsubstatus] = df[self.ic_sub_status]
        df.loc[df.cancelled == 1,self.cancelreason] = df[self.pjh]
        
        # This needs to be the last change made before returning.
        df = self._sort_for_table(df)
        
        return df
    
    
    def _return_groupby_column(self, df ,group_by_cols, series, min_max ,new_column_name, filter_column=None, df_filter=None, comparison=None):
        
        if df_filter is not None:
            df = eval("df[df['" + filter_column + "'] " + comparison + " '" +  df_filter + "']")
        
        if min_max == 'min':
            status_date_to_use_df = (
                df
                .groupby(group_by_cols)
                [series]
                .min()
            )
            
        if min_max == 'max':
            status_date_to_use_df = (
                df
                .groupby(group_by_cols)
                [series]
                .min()
            )
            
        status_date_to_use_df = (
            status_date_to_use_df
            .reset_index(drop=False)
            .rename(columns={series:new_column_name})
        )
        
        return status_date_to_use_df
    
    
    def _format_transaction_id(self,df):
    
        df['adjusted_transaction_id'] = df[self.transaction_id].str.extract(r'(\d+$)')
        
        df['adjusted_transaction_id'] = (
            df
            .adjusted_transaction_id
            .apply(lambda x: x[0:19])
        )
        
        df['adjusted_transaction_id'] = pd.to_numeric(df.adjusted_transaction_id)
        
        return df
    
    
    def _create_one_hot_encoding(self,df):
        
        df[self.filled] = np.where((df[self.ic_sub_status] == self.shipment_status),1,0)
        df[self.transferred] = np.where((df[self.pjh] == self.transfered_status),1,0)
        df[self.cancelled] = np.where((df[self.ic_status] == self.cancelled_status) & (df[self.pjh] != self.transfered_status),1,0)
        df[self.still_open] = np.where((df.filled == 0) & (df.transferred == 0) & (df.cancelled == 0),1,0)
        
        return df
    
    
    def _sort_for_table(self, df):
        
        df = df[[self.transaction_id, self.pat_id, self.pharm_name, self.cs_outlet_id, self.brand_id, self.diagnosis_code, self.provider_fn, self.provider_ln, 
                 self.provider_s, self.provider_z, self.payer, self.payer_type, self.ref_source, self.referral_status, self.ref_date, self.ic_status, self.ic_sub_status,
                 self.status_date, self.firstfilldate, self.canceldate, self.cancelsubstatus, self.cancelreason, self.filled, self.cancelled, self.transferred, self.still_open]]
        
        df = df.rename(columns={self.transaction_id:'transaction_id', self.pat_id:'longitudinal_patient_id', self.pharm_name:'pharmacy_name', self.brand_id:'brand', self.ref_source:'referral_source', 
                                self.provider_fn:'hcp_first_name', self.provider_ln:'hcp_last_name', self.provider_s:'hcp_state', self.payer:'primary_payer', self.payer_type:'primary_payer_type',
                                self.ic_status:'customer_status', self.ic_sub_status:'customer_status_description', self.status_date:'status_date', self.ref_date:'referral_date', self.provider_z:'hcp_zip',
                                self.cs_outlet_id:'cs_outlet_id', self.diagnosis_code:'dx_1'})
        
        return df


transform = Transform()

### Description
What does this transformation do? be specific.

![what does your transform do](assets/what.gif)

![Fill Rate Schema](assets/FillRateSchema.png)



This script checks for any active status and grabs the first instance of Active Shipped and maps it as filled. For any other instance (Transferred, Cancelled, or Still Open) it grabs the last instance and maps it under the appropriate column.

### Transformation

In [None]:
"""
************ FETCH DATA - TOUCH, BUT CAREFULLY **************
This cell will be turned off in production, as the input_contract will be handled by the pipeline.
"""

from core.dataset_diff import DatasetDiff

diff = DatasetDiff(db_transform.id)
final_dataframe = diff.get_diff(transform_name=transform.input_transform, values=[run_id])

# import os

# import numpy as np
# import pandas as pd

# pd.options.display.max_columns=100

# df.status_date = df.status_date.str[:8].astype(str)
# df.ref_date = df.ref_date.str[:8].astype(str)

# df.status_date = pd.to_datetime(df.status_date, infer_datetime_format=True, errors='coerce')
# df.ref_date = pd.to_datetime(df.ref_date, infer_datetime_format=True, errors='coerce')

# os.chdir('{}'.format(os.path.expanduser('~')))
# status_config = pd.read_csv('status_mapping.csv')

# status_config.loc[:,'statusCode'] = status_config.statusCode.str.upper()
# status_config.loc[:,'subStatus'] = status_config.subStatus.str.upper()
# status_config.loc[:,'integrichain_sub_status'] = status_config.integrichain_sub_status.str.upper()
# status_config.loc[:,'integrichain_status'] = status_config.integrichain_status.str.upper()
# status_config.loc[:,'Patient_Journey_Hierarchy'] = status_config.Patient_Journey_Hierarchy.str.upper()

# status_config = status_config.rename(columns={'statusCode':'status_code','subStatus':'sub_status'})

# df.sub_status = df.sub_status.str.replace('PRESCRIBERHOLD','PRESCRIBER HOLD')

# df = pd.merge(df,status_config,on=['status_code','sub_status'])

# df = df[['rec_date', 'pharm_code', 'pharm_npi', 'transtype', 'pharm_transaction_id', 'trans_seq', 'ref_source', 'ref_date', 'program_id', 'pharmacy_id', 'pat_last_name', 'pat_first_name', 'pat_dob', 'pat_gender', 
#          'pat_addr1', 'pat_addr2', 'pat_city', 'pat_state', 'pat_zip', 'dx1_code', 'dx2_code', 'status_date', 'status_code', 'sub_status', 'integrichain_status','integrichain_sub_status', 'Patient_Journey_Hierarchy', 
#          'pres_last_name', 'pres_first_name', 'pres_addr1', 'pres_addr2', 'pres_city', 'pres_state', 'pres_zip', 'pres_phone', 'pres_npi', 'pres_dea', 'facility_name', 'rxdate', 'rxnumber', 'rxrefills', 'rxfill', 
#          'refill_remaining', 'prev_disp', 'rx_ndc_number', 'medication', 'quantity', 'day_supply', 'ship_date', 'ship_carrier', 'shiptracking_num', 'ship_location', 'ship_address', 'ship_city', 'ship_state', 'ship_zip', 
#          'has_medical', 'primary_coverage_type', 'primary_payer_name', 'primary_payer_type', 'secondary_coverage_type', 'secondary_payer_name', 'secondary_payer_type', 'plan_paid_amt', 'pat_copay', 'copay_assist_amount', 
#          'oth_payer_amt', 'xfer_pharmname', 'msa_patient_id', 'msa_patient_bmap', '__metadata_run_timestamp', '__metadata_app_version', '__metadata_output_contract', '__metadata_transform_timestamp', '__metadata_run_id']]

In [None]:
### Use the variables above to execute your transformation. the final output needs to be a variable named final_dataframe
#final_dataframe
final_dataframe = transform.fill_rate_schema(df)

### Publish

In [None]:
## that's it - just provide the final dataframe to the var final_dataframe and we take it from there
transform.publish_contract.publish(final_dataframe, run_id, session)
session.close()