In [1]:
transform_id = 19

In [2]:
"""
************ SETUP - DON'T TOUCH **************
This section imports data from the configuration database
and should not need to be altered, molested or otherwise messed with. 
~~These are not the droids you are looking for~~
"""
from core.constants import BRANCH_NAME, ENV_BUCKET
from core.helpers.session_helper import SessionHelper
from core.models.configuration import Transformation
from dataclasses import dataclass
from core.dataset_contract import DatasetContract
import pandas as pd

db_transform = SessionHelper().session.query(Transformation).filter(Transformation.id == transform_id).one()

@dataclass
class DbTransform:
    id: int = db_transform.id ## the instance id of the transform in the config app
    name: str = db_transform.transformation_template.name ## the transform name in the config app
    state: str = db_transform.pipeline_state.pipeline_state_type.name ## the pipeline state, one of raw, ingest, master, enhance, enrich, metrics, dimensional
    branch: str = BRANCH_NAME ## the git branch for this execution 
    brand: str = db_transform.pipeline_state.pipeline.brand.name ## the pharma brand name
    pharmaceutical_company: str = db_transform.pipeline_state.pipeline.brand.pharmaceutical_company.name # the pharma company name
    publish_contract: DatasetContract = DatasetContract(branch=BRANCH_NAME,
                            state=db_transform.pipeline_state.pipeline_state_type.name,
                            parent=db_transform.pipeline_state.pipeline.brand.pharmaceutical_company.name,
                            child=db_transform.pipeline_state.pipeline.brand.name,
                            dataset=db_transform.transformation_template.name)

2019-06-03 13:19:38,895 - core.helpers.session_helper.SessionHelper - INFO - Creating session for dev environment...
2019-06-03 13:19:38,918 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Generating administrator mocks.
2019-06-03 13:19:38,955 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Done generating administrator mocks.
2019-06-03 13:19:38,956 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Generating pharmaceutical company mocks.
2019-06-03 13:19:38,960 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Done generating pharmaceutical company mocks.
2019-06-03 13:19:38,961 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Generating brand mocks.
2019-06-03 13:19:38,966 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Done generating brand mocks.
2019-06-03 13:19:38,969 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Generating segment mocks.
2019-06-03 13:19:3

# CORE Cartridge Notebook::[transform name here]
![CORE Logo](assets/coreLogo.png) 

---
## Keep in Mind
Good Transforms Are...
- **singular in purpose:** good transforms do one and only one thing, and handle all known cases for that thing. 
- **repeatable:** transforms should be written in a way that they can be run against the same dataset an infinate number of times and get the same result every time. 
- **easy to read:** 99 times out of 100, readable, clear code that runs a little slower is more valuable than a mess that runs quickly. 
- **No 'magic numbers':** if a variable or function is not instantly obvious as to what it is or does, without context, maybe consider renaming it.

## Workflow - how to use this notebook to make science
#### Data Science
1. **Document your transform.** Fill out the _description_ cell below describing what it is this transform does; this will appear in the configuration application where Ops will create, configure and update pipelines. 
1. **Define your config object.** Fill out the _configuration_ cell below the commented-out guide to define the variables you want ops to set in the configuration application (these will populate here for every pipeline). 
2. **Build your transformation logic.** Use the transformation cell to do that magic that you do. 
![caution](assets/cautionTape.png)

### Configuration

In [3]:
""" 
********* CONFIGURATION - PLEASE TOUCH ********* 
This section defines what you expect to get from the configuration application 
in a single "transform" object. Define the vars you need here, and comment inline to the right of them 
for all-in-one documentation. 
Engineering will build a production "transform" object for every pipeline that matches what you define here.

@@@ FORMAT OF THE DATA CLASS IS: @@@ 

<value_name>: <data_type> #<comment explaining what the value is to future us>

~~These ARE the droids you are looking for~~
"""

class Transform(DbTransform):
        input_transform: str = db_transform.variables.input_transform # The name of the transform to input source data from
        ## YOUR properties go here!!

In [4]:
## Please place your value assignments for development here!!
## This cell will be turned off in production and Engineering will set to pull form the configuration application instead

transform = Transform()
#transform.input_transform = "symphony_health_association_filter_shipment_only"

### Description
What does this transformation do? be specific.

![what does your transform do](assets/what.gif)

This transformation prepares the dataset for extract. It drops unnecessary columns and renames others to fit SHA's standards.

### Transformation

In [5]:
### Retrieve current dataset from contract
from core.dataset_diff import DatasetDiff

diff = DatasetDiff(db_transform.id)
df = diff.get_diff(transform_name=transform.input_transform)

2019-06-03 13:19:48,096 - core.helpers.session_helper.SessionHelper - INFO - Creating session for dev environment...
2019-06-03 13:19:48,111 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Generating administrator mocks.
2019-06-03 13:19:48,116 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Done generating administrator mocks.
2019-06-03 13:19:48,121 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Generating pharmaceutical company mocks.
2019-06-03 13:19:48,126 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Done generating pharmaceutical company mocks.
2019-06-03 13:19:48,127 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Generating brand mocks.
2019-06-03 13:19:48,133 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Done generating brand mocks.
2019-06-03 13:19:48,134 - core.helpers.configuration_mocker.ConfigurationMocker - DEBUG - Generating segment mocks.
2019-06-03 13:19:4

In [6]:
### Use the variables above to execute your transformation. the final output needs to be a variable named final_dataframe

from collections import OrderedDict

sha_fields = OrderedDict([
    ['hcp_lname', 'pres_last_name'],
    ['hcp_fname', 'pres_first_name'],
    ['hcp_mid', None],
    ['hcp_addr1', 'pres_addr1'],
    ['hcp_addr2', 'pres_addr2'],
    ['hcp_city', 'pres_city'],
    ['hcp_state', 'pres_state'],
    ['hcp_zip', 'pres_zip'],
    ['hcp_dea', 'pres_dea'],
    ['hcp_npi', 'pres_npi'],
    ['prod_ndc', 'rx_ndc_number'],
    ['prod_name', 'brand'],
    ['disp_form', None],
    ['strength', 'strength'],
    ['uom', 'uom'],
    ['qty_disp', 'quantity'],
    ['cost', 'pat_copay'],
    ['days_supply', 'day_supply'],
    ['plan_name', None],
    ['plan_type', None],
    ['plan_bin', None],
    ['plan_grp', None],
    ['plan_pcn', None],
    ['ship_date', 'ship_date'],
    ['prim_icd', 'dx1_code'],
    ['sec_icd', 'dx2_code'],
    ['rx_type', 'rx_type'],
    ['rx_num', 'rxnumber'],
    ['pharm_ncpdp', None],
    ['pharm_dea', None],
    ['pharm_npi', 'pharm_npi'],
    ['pharm_name', None],
    ['pharm_addr', None],
    ['pharm_city', None],
    ['pharm_state', None],
    ['pharm_zip', None],
    ['sp_name', 'pharm_code'],
    ['txn_id', 'pharm_transaction_id']
    ])

# drop and rename
# Create and fill SHA Extract dataframe from SAH data dataframe
final_dataframe = pd.DataFrame(None, columns=sha_fields.keys())
for (key,value) in sha_fields.items():
    if value is not None:
        final_dataframe[key] = df[value]

### Publish

In [7]:
## that's it - just provide the final dataframe to the var final_dataframe and we take it from there
transform.publish_contract.publish(final_dataframe)

2019-06-03 13:20:32,333 - core.dataset_contract.DatasetContract - INFO - Publishing dataframe to s3 location s3://ichain-dev/stephanie/stephanie/ilumya/dimensional/symphony_health_association_extract_column_mapping.
2019-06-03 13:20:32,338 - s3parq.publish_parq - INFO - Checking params...
2019-06-03 13:20:32,339 - s3parq.publish_parq - INFO - Params valid.
2019-06-03 13:20:32,362 - s3parq.publish_parq - INFO - Writing to S3...
