___Author: Mikael Koli___

In [1]:
import sys
sys.path.append("../..")

In [2]:
import jubox
jubox.__version__

'0.2.0'

# Recipe: Pipelining Jupyter
This is a short demo to demonstrate how to parametrize a notebook, 
add custom information as markdown and then run the notebook and save the results using Jubox.

## Loading the notebook

In [3]:
from jubox import JupyterNotebook, CodeCell, MarkdownCell

In [4]:
task_name = "tasks/example_etl.ipynb"
nb = JupyterNotebook(task_name)

# We also clear all outputs if there are
# any left from last run
nb.clear_outputs(inplace=True)

## Adding parameters

In [5]:
# Set parameters
import datetime
params = CodeCell.from_variable_dict(
    start_date=datetime.datetime(2020, 2, 16),
    end_date=datetime.datetime(2020, 2, 20),
    categories=["A", "B", "D"],
)
params.insert(0, "import datetime\n")

param_cell = nb.get_cells(tags=["parameters"])[0]

# We also include datetime importing as we use it in the
# parameters
param_cell.insert(0, 'import datetime\n')

param_cell.overwrite(params)

In [6]:
nb[:3]

## Adding run info

In [7]:
# Insert some relevant run info at the start

import platform
import pandas as pd

runinfo = f"""
***Run info (generated by scheduler)***
- Start date: `{datetime.date.today():%Y-%m-%d}`
- Start time: `{datetime.datetime.now():%H:%M}`
- Using Python `{platform.python_version()}`
    - Using Pandas `{pd.__version__}`
***
"""
nb.insert(0, MarkdownCell(runinfo))

In [8]:
nb[:4]

## Running the notebook

### Utils for running the notebook

In [9]:
# We make a dummy function to illustrate
# email sending.

def send_email(receiver, subject, body):
    print("Sending email")
    

### Execution

In [10]:
# Run the notebook, and handle possible error
from nbconvert.preprocessors import CellExecutionError

try:
    nb(inplace=True)
except CellExecutionError:
    # Error occured. 
    # We get the error as HTML
    html_summary = nb.error_cells[0].get_output_as_html()
    status = "failed"
    raise
else:
    # Take the summary report that is tagged
    # with tag "summary"
    summary_cell = nb.get_cells(tags=["summary"])[0]
    html_summary = summary_cell.get_output_as_html()

    status = "succeeded"
finally:
    # Saving the notebook as HTML
    nb.to_html(f"results/{datetime.datetime.now():%Y-%m-%d}_run_book.html")
    
    # Saving the notebook with the outputs to anoher notebook
    nb.to_ipynb("results/run_task.ipynb")
    
    # Pretending to send the runned notebook's summary 
    # as email
    send_email(
        receiver="maintainer@email.com", 
        subject=f"Job {task_name} {status}", 
        body=html_summary
    )

Sending email


## Display results and the ran book

In [11]:
from IPython.core.display import display, HTML
display(HTML(html_summary))

In [12]:
nb

Unnamed: 0,date,category,value
0,2020-02-16,A,0.781831
1,2020-02-28,C,0.285411
2,2020-02-26,B,0.245172
3,2020-03-01,B,0.352998
4,2020-02-18,D,0.338662

category,A,B,C,D,E
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-02-01,1.596612,0.265367,0.818775,1.405924,2.174068
2020-02-02,2.050263,1.77299,0.79119,0.576936,0.011162
2020-02-03,1.615161,1.828015,1.637706,0.802924,1.380067
2020-02-04,0.975403,2.843379,1.627929,1.439339,0.437286
2020-02-05,0.206974,,1.224652,2.116026,0.260409

category,A,B,D
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-02-16,1.048978,0.855667,2.07825
2020-02-17,1.868935,1.59823,1.343502
2020-02-18,0.131998,1.469203,2.604135
2020-02-19,1.491585,0.341256,
2020-02-20,0.925323,1.16393,
