In [None]:
# workaround via specifying an invalid value first
%config Application.log_level='WORKAROUND'
# => fails, necessary on Fedora 27, ipython3 6.2.1
%config Application.log_level='DEBUG'
import logging
logging.getLogger().setLevel(logging.DEBUG)
log = logging.getLogger()
log.debug('Test debug')

In [None]:
transform_id=26

In [None]:
"""
************ SETUP - DON'T TOUCH **************
This section imports data from the configuration database
and should not need to be altered, molested or otherwise messed with. 
~~These are not the droids you are looking for~~
"""
from core.constants import BRANCH_NAME, ENV_BUCKET
from core.helpers.session_helper import SessionHelper
from core.models.configuration import Transformation
from dataclasses import dataclass
from core.dataset_contract import DatasetContract

db_transform = SessionHelper().session.query(Transformation).filter(Transformation.id == transform_id).one()

@dataclass
class DbTransform:
    id: int = db_transform.id ## the instance id of the transform in the config app
    name: str = db_transform.transformation_template.name ## the transform name in the config app
    state: str = db_transform.pipeline_state.pipeline_state_type.name ## the pipeline state, one of raw, ingest, master, enhance, enrich, metrics, dimensional
    branch:str = BRANCH_NAME ## the git branch for this execution 
    brand: str = db_transform.pipeline_state.pipeline.brand.name ## the pharma brand name
    pharmaceutical_company: str = db_transform.pipeline_state.pipeline.brand.pharmaceutical_company.name # the pharma company name
    publish_contract: DatasetContract = DatasetContract(branch=BRANCH_NAME,
                            parent=db_transform.pipeline_state.pipeline.brand.pharmaceutical_company.name,
                            child=db_transform.pipeline_state.pipeline.brand.name,
                            state=db_transform.pipeline_state.pipeline_state_type.name,
                            dataset=db_transform.transformation_template.name)


# CORE Cartridge Notebook::[transform name here]
![CORE Logo](assets/coreLogo.png) 

---
## Keep in Mind
Good Transforms Are...
- **singular in purpose:** good transforms do one and only one thing, and handle all known cases for that thing. 
- **repeatable:** transforms should be written in a way that they can be run against the same dataset an infinate number of times and get the same result every time. 
- **easy to read:** 99 times out of 100, readable, clear code that runs a little slower is more valuable than a mess that runs quickly. 
- **No 'magic numbers':** if a variable or function is not instantly obvious as to what it is or does, without context, maybe consider renaming it.

## Workflow - how to use this notebook to make science
#### Data Science
1. **Document your transform.** Fill out the _description_ cell below describing what it is this transform does; this will appear in the configuration application where Ops will create, configure and update pipelines. 
1. **Define your config object.** Fill out the _configuration_ cell below the commented-out guide to define the variables you want ops to set in the configuration application (these will populate here for every pipeline). 
2. **Build your transformation logic.** Use the transformation cell to do that magic that you do. 
![caution](assets/cautionTape.png)

### Configuration

In [None]:
""" 
********* CONFIGURATION - PLEASE TOUCH ********* 
This section defines what you expect to get from the configuration application 
in a single "transform" object. Define the vars you need here, and comment inline to the right of them 
for all-in-one documentation. 
Engineering will build a production "transform" object for every pipeline that matches what you define here.

@@@ FORMAT OF THE DATA CLASS IS: @@@ 

<value_name>: <data_type> #<comment explaining what the value is to future us>

~~These ARE the droids you are looking for~~
"""

class Transform(DbTransform):
        ## YOUR properties go here!!
        ingest_source_transform: str = db_transform.variables.ingest_source_transform # The name of the dataset to pull from
        ingest_source_file_prefix: str = db_transform.variables.ingest_source_file_prefix # If from initial ingest, the file prefix name
        persisted_columns: str = db_transform.variables.persisted_columns # String array of columns to keep

In [None]:
transform = Transform()

In [None]:
## Please place your value assignments for development here!!
## This cell will be turned off in production and Engineering will set to pull form the configuration application instead
persisted_columns = [
    'rec_date',
    'pharm_code',
    'pharm_npi',
    'transtype',
    'pharm_transaction_id',
    'trans_seq',
    'ref_source',
    'ref_date',
    'program_id',
    'pharmacy_id',
    'pat_last_name',
    'pat_first_name',
    'pat_dob',
    'pat_gender',
    'pat_addr1',
    'pat_addr2',
    'pat_city',
    'pat_state',
    'pat_zip',
    'dx1_code',
    'dx2_code',
    'status_date',
    'status_code',
    'sub_status',
    'pres_last_name',
    'pres_first_name',
    'pres_addr1',
    'pres_addr2',
    'pres_city',
    'pres_state',
    'pres_zip',
    'pres_phone',
    'pres_npi',
    'pres_dea',
    'facility_name',
    'rxdate',
    'rxnumber',
    'rxrefills',
    'rxfill',
    'refill_remaining',
    'prev_disp',
    'rx_ndc_number',
    'medication',
    'quantity',
    'day_supply',
    'ship_date',
    'ship_carrier',
    'shiptracking_num',
    'ship_location',
    'ship_address',
    'ship_city',
    'ship_state',
    'ship_zip',
    'has_medical',
    'primary_coverage_type',
    'primary_payer_name',
    'primarypayer_type',
    'secondary_coverage_type',
    'secondary_payer_name',
    'secondary_payer_type',
    'plan_paid_amt',
    'pat_copay',
    'copay_assist_amount',
    'oth_payer_amt',
    'xfer_pharmname',
    'msa_patient_id',
    'msa_patient_bmap'
    ]

### Description
What does this transformation do? be specific.

![what does your transform do](assets/what.gif)

This transform takes the Symphony Health Association base ingested data and separates out useful columns and uppercases all the data.

### Transformation

In [None]:
import ast
import pandas as pd
from s3parq.s3parq import fetch_diff

from core.helpers.contract_creator import get_relative_contract

In [None]:
# Place your import contracts here
diff_partition = "__metadata_run_timestamp"
#ingest_contract = get_relative_contract(t_name=transform.ingest_source_transform, contract=transform.publish_contract)
ingest_contract = DatasetContract(branch=BRANCH_NAME,
                            parent=transform.pharmaceutical_company,
                            child=transform.brand,
                            state="ingest",
                            dataset=transform.ingest_source_transform)

ingest_key = ingest_contract.key+"/"+transform.ingest_source_file_prefix

final_dataframe = fetch_diff(input_bucket=bucket, input_key=transform.publish_contract.key, comparison_bucket=bucket, comparison_key=ingest_key, partition=diff_partition)

In [None]:
ingest_contract.dataset = transform.ingest_source_transform + "/" + transform.ingest_source_file_prefix
# TODO: fetch_diff when fixed
final_dataframe = ingest_contract.fetch(filters=[])

In [None]:
# Select columns - TODO: fix to the name thing
for x in range(len(persisted_columns)):
    final_dataframe.rename(columns={final_dataframe.columns[x]:persisted_columns[x]}, inplace=True)

In [None]:
# Uppercase
# final_dataframe = pd.concat([final_dataframe[col].astype(str).str.upper() for col in final_dataframe.columns], axis=1)
final_dataframe = final_dataframe.apply(lambda x: x.str.upper().str.strip() if isinstance(x, object) else x)

### Publish

In [None]:
## that's it - just provide the final dataframe to the var final_dataframe and we take it from there
transform.publish_contract.publish(final_dataframe)