# Papermill Workflow

In [1]:
import papermill as pm
from datetime import date
import ipyparallel as ipp

In [2]:
today = str(date.today())

## Making a `run_notebook` function

In [3]:
def run_notebook (inpath, outpath, parameters):
    pm.execute_notebook(inpath,
                    outpath,
                    parameters
                   )
    

## Pre-Processing Notebooks
### OpenFDA Drug Data

In [5]:
%%time

run_notebook('OpenFDA_DrugLabel.ipynb',
            'Logs/'+today+'_OpenFDA_DrugLabel.ipynb',
             ## Changing the `run_flatten` parameter to True makes this notebook take multiple hours to run
            {'run_flatten':False}
            )

Executing:   0%|          | 0/16 [00:00<?, ?cell/s]

Wall time: 10.2 s


### Medicare Part D Prescriptions Data

In [6]:
%%time
run_notebook('Part_D_Cleanup.ipynb',
            'Logs/'+today+'Part_D_Cleanup.ipynb',
             {}
            )

Executing:   0%|          | 0/21 [00:00<?, ?cell/s]

Wall time: 1min 14s


### Sunshine Act Payments Data

In [7]:
%%time
run_notebook('Sunshine_Act_Cleanup.ipynb',
            'Logs/'+today+'Sunshine_Act_Cleanup.ipynb',
             {}
            )

Executing:   0%|          | 0/45 [00:00<?, ?cell/s]

Wall time: 5min 28s


## Drug Entity Resolution
1. Medicare Part D

In [8]:
run_notebook('MedicareD_Drug_Dedupe.ipynb',
            'Logs/'+today+'MedicareD_Drug_Dedupe.ipynb',
             {'retrain':False}
            )


Executing:   0%|          | 0/16 [00:00<?, ?cell/s]

2. OpenFDA

In [9]:
run_notebook('Openfda_Drug_Deduplicated.ipynb',
            'Logs/'+today+'Openfda_Drug_Deduplicated.ipynb',
             {}
            )

Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

3. Matching Medicare Part D and OpenFDA

In [10]:
run_notebook('Drug_Matching_Openfda-MedicareD.ipynb',
            'Logs/'+today+'Drug_Matching_Openfda-MedicareD.ipynb',
             {'retrain':False}
            )

Executing:   0%|          | 0/23 [00:00<?, ?cell/s]

4. Deduplicating the Sunshine Act Drug data

In [11]:
run_notebook('Sunshine_Drug_Distinctified.ipynb',
            'Logs/'+today+'Sunshine_Drug_Distinctified-MedicareD.ipynb',
             {}
            )

Executing:   0%|          | 0/6 [00:00<?, ?cell/s]

5. Matching Unique Sunshine Act Drug Data to pre-matched `Medicare-OpenFDA` Drug data

In [12]:
run_notebook('Final_Drug_Matching.ipynb',
            'Logs/'+today+'Final_Drug_Matching.ipynb',
             {'retrain':False}
            )

Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

## Manufacturer Entity Resolution
1. Getting unique manufacturer entities from the OpenFDA data

In [13]:
run_notebook('openfda_manufacturer_Dedupe.ipynb',
            'Logs/'+today+'openfda_manufacturer_Dedupe.ipynb',
             {'retrain':False}
            )

Executing:   0%|          | 0/16 [00:00<?, ?cell/s]

2. Matching unique OpenFDA manufacturers with Sunshine Act Manufacturers

In [14]:
run_notebook('Manufacturer_Matching.ipynb',
            'Logs/'+today+'Manufacturer_Matching.ipynb',
             {'retrain':False}
            )

Executing:   0%|          | 0/24 [00:00<?, ?cell/s]

## Physician Entity Resolution
1. Find physician matches in Sunshine Act (payments) data and Medicare Part D (Prescriptions) data

In [16]:
run_notebook('Physicians_Deduplication.ipynb',
            'Logs/'+today+'Physicians_Deduplication.ipynb',
             {}
            )

## Final Integration Procedure & Node/Edge Creation

In [17]:
run_notebook('Data_Integration.ipynb',
            'Logs/'+today+'Data_Integration.ipynb',
             {}
            )

Executing:   0%|          | 0/26 [00:00<?, ?cell/s]