In [1]:
transform_id = 16

In [2]:
"""
************ SETUP - DON'T TOUCH **************
This section imports data from the configuration database
and should not need to be altered, molested or otherwise messed with. 
~~These are not the droids you are looking for~~
"""
from core.constants import BRANCH_NAME, ENV_BUCKET
from core.helpers.session_helper import SessionHelper
from core.models.configuration import Transformation
from dataclasses import dataclass
from core.dataset_contract import DatasetContract
import pandas as pd

db_transform = SessionHelper().session.query(Transformation).filter(Transformation.id == transform_id).one()

@dataclass
class DbTransform:
    id: int = db_transform.id ## the instance id of the transform in the config app
    name: str = db_transform.transformation_template.name ## the transform name in the config app
    state: str = db_transform.pipeline_state.pipeline_state_type.name ## the pipeline state, one of raw, ingest, master, enhance, enrich, metrics, dimensional
    branch: str = BRANCH_NAME ## the git branch for this execution 
    brand: str = db_transform.pipeline_state.pipeline.brand.name ## the pharma brand name
    pharmaceutical_company: str = db_transform.pipeline_state.pipeline.brand.pharmaceutical_company.name # the pharma company name
    publish_contract: DatasetContract = DatasetContract(branch=BRANCH_NAME,
                            state=db_transform.pipeline_state.pipeline_state_type.name,
                            parent=db_transform.pipeline_state.pipeline.brand.pharmaceutical_company.name,
                            child=db_transform.pipeline_state.pipeline.brand.name,
                            dataset=db_transform.transformation_template.name)

2019-05-28 18:35:26,738 - core.helpers.session_helper.SessionHelper - INFO - Creating session for dev environment...
2019-05-28 18:35:26,761 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Generating administrator mocks.
2019-05-28 18:35:26,790 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Done generating administrator mocks.
2019-05-28 18:35:26,791 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Generating pharmaceutical company mocks.
2019-05-28 18:35:26,795 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Done generating pharmaceutical company mocks.
2019-05-28 18:35:26,796 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Generating brand mocks.
2019-05-28 18:35:26,800 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Done generating brand mocks.
2019-05-28 18:35:26,801 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Generating segment mocks.
2019-05-28 18:35:2

# CORE Cartridge Notebook::[transform name here]
![CORE Logo](assets/coreLogo.png) 

---
## Keep in Mind
Good Transforms Are...
- **singular in purpose:** good transforms do one and only one thing, and handle all known cases for that thing. 
- **repeatable:** transforms should be written in a way that they can be run against the same dataset an infinate number of times and get the same result every time. 
- **easy to read:** 99 times out of 100, readable, clear code that runs a little slower is more valuable than a mess that runs quickly. 
- **No 'magic numbers':** if a variable or function is not instantly obvious as to what it is or does, without context, maybe consider renaming it.

## Workflow - how to use this notebook to make science
#### Data Science
1. **Document your transform.** Fill out the _description_ cell below describing what it is this transform does; this will appear in the configuration application where Ops will create, configure and update pipelines. 
1. **Define your config object.** Fill out the _configuration_ cell below the commented-out guide to define the variables you want ops to set in the configuration application (these will populate here for every pipeline). 
2. **Build your transformation logic.** Use the transformation cell to do that magic that you do. 
![caution](assets/cautionTape.png)

### Configuration

In [3]:
""" 
********* CONFIGURATION - PLEASE TOUCH ********* 
This section defines what you expect to get from the configuration application 
in a single "transform" object. Define the vars you need here, and comment inline to the right of them 
for all-in-one documentation. 
Engineering will build a production "transform" object for every pipeline that matches what you define here.

@@@ FORMAT OF THE DATA CLASS IS: @@@ 

<value_name>: <data_type> #<comment explaining what the value is to future us>

~~These ARE the droids you are looking for~~
"""

class Transform(DbTransform):
        input_transform: str = db_transform.variables.input_transform # The name of the transform to input source data from
        ## YOUR properties go here!!

In [4]:
## Please place your value assignments for development here!!
## This cell will be turned off in production and Engineering will set to pull form the configuration application instead

transform = Transform()
#transform.input_transform = "symphony_health_association_map_product_ndcs"

### Description
What does this transformation do? be specific.

![what does your transform do](assets/what.gif)

This transformation filters the brands present in the input df to a set of predefined brands. It also drops duplicate transactions for the transactions found for the specified brands.

### Transformation

In [5]:
### Retrieve current dataset from contract
from core.dataset_diff import DatasetDiff

diff = DatasetDiff(db_transform.id)
final_dataframe = diff.get_diff(transform_name=transform.input_transform)

2019-05-28 18:35:29,243 - core.helpers.session_helper.SessionHelper - INFO - Creating session for dev environment...
2019-05-28 18:35:29,257 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Generating administrator mocks.
2019-05-28 18:35:29,262 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Done generating administrator mocks.
2019-05-28 18:35:29,263 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Generating pharmaceutical company mocks.
2019-05-28 18:35:29,268 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Done generating pharmaceutical company mocks.
2019-05-28 18:35:29,270 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Generating brand mocks.
2019-05-28 18:35:29,273 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Done generating brand mocks.
2019-05-28 18:35:29,275 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Generating segment mocks.
2019-05-28 18:35:2

In [6]:
final_dataframe.head()

Unnamed: 0,rec_date,pharm_code,pharm_npi,transtype,pharm_transaction_id,trans_seq,ref_source,ref_date,program_id,pharmacy_id,...,oth_payer_amt,xfer_pharmname,msa_patient_id,msa_patient_bmap,__metadata_app_version,__metadata_output_contract,brand,strength,prod_grp_id,__metadata_run_timestamp
0,20181024115959,ACCREDO,1346208949,COM,279133432018102401,0,DIRECT,20181019120000,,27913343,...,,,,NNNNV,0.0.11,s3://ichain-dev/stephanie/stephanie/ilumya/mas...,,,,2019-05-28 15:22:56
1,20181025115959,ACCREDO,1346208949,COM,278370982018102502,0,DIRECT,20181022120000,,27837098,...,,,,NNNVV,0.0.11,s3://ichain-dev/stephanie/stephanie/ilumya/mas...,,,,2019-05-28 15:22:56
2,20181029115959,ACCREDO,1346208949,COM,279181482018102903,0,DIRECT,20181024120000,,27918148,...,,,,NNNVV,0.0.11,s3://ichain-dev/stephanie/stephanie/ilumya/mas...,,,,2019-05-28 15:22:56
3,20181102115959,ACCREDO,1346208949,COM,267244982018110204,0,DIRECT,20181030120000,,26724498,...,,,,NNNVV,0.0.11,s3://ichain-dev/stephanie/stephanie/ilumya/mas...,,,,2019-05-28 15:22:56
4,20181106115959,ACCREDO,1346208949,COM,160618142018110605,0,DIRECT,20181102120000,,16061814,...,,,,NNNVV,0.0.11,s3://ichain-dev/stephanie/stephanie/ilumya/mas...,,,,2019-05-28 15:22:56


In [8]:
### Use the variables above to execute your transformation. the final output needs to be a variable named final_dataframe

# Build SHA data dataframe
final_dataframe = final_dataframe.loc[(final_dataframe.medication.str.startswith(transform.brand,na=False)),:]

# Dedupe to make sure historical records aren't reported more than once
final_dataframe.drop_duplicates(subset='pharm_transaction_id', keep='last', inplace=True)

### Publish

In [9]:
## that's it - just provide the final dataframe to the var final_dataframe and we take it from there
transform.publish_contract.publish(final_dataframe)

2019-05-28 18:35:46,198 - core.dataset_contract.DatasetContract - INFO - Publishing dataframe to s3 location s3://ichain-dev/stephanie/stephanie/ilumya/master/symphony_health_association_filter_to_brand.
2019-05-28 18:35:46,202 - s3parq.publish_parq - INFO - Checking params...
2019-05-28 18:35:46,205 - s3parq.publish_parq - INFO - Params valid.
2019-05-28 18:35:46,225 - s3parq.publish_parq - INFO - Writing to S3...
