In [11]:
import papermill as pm


def run_notebook(input_notebook, output_notebook=None, parameters=None):
    """
    Utility function to execute a notebook with papermill.

    Args:
        input_notebook (str): Path to the input notebook.
        output_notebook (str): Path where the output notebook should be saved.
        parameters (dict, optional): Parameters to pass into the notebook.
    """
    try:
        print(f"Running notebook: {input_notebook}")
        pm.execute_notebook(input_notebook, output_notebook, parameters=parameters or {})
        print(f"✅ Done: {input_notebook}")
    except Exception as e:
        print(f"Error occurred during notebook execution: {str(e)}")
        raise e


def main():
    print("Starting the flow")

    run_notebook("00.pre-process.ipynb")  # , 'executed/00.pre-process.ipynb')

    run_notebook("03.post-processing.ipynb")  # , 'executed/03.post-processing.ipynb')

    # Final step after all notebooks have been run
    print("Flow has completed successfully!")


if __name__ == "__main__":
    main()

Starting the flow
Running notebook: 00.pre-process.ipynb


Executing:   0%|          | 0/3 [00:00<?, ?cell/s]

✅ Done: 00.pre-process.ipynb
Running notebook: 03.post-processing.ipynb


Executing:   0%|          | 0/3 [00:00<?, ?cell/s]

✅ Done: 03.post-processing.ipynb
Flow has completed successfully!


In [13]:
## Parallel execution

In [12]:
# import papermill as pm
# from concurrent.futures import ThreadPoolExecutor, as_completed

# def run_notebook(input_notebook, parameters=None):
#     """
#     Executes a notebook without saving the output.

#     Args:
#         input_notebook (str): Path to the input notebook.
#         parameters (dict, optional): Parameters to pass into the notebook.
#     """
#     try:
#         print(f"Running notebook: {input_notebook}")
#         pm.execute_notebook(input_notebook, input_notebook, parameters=parameters or {})
#         print(f"✅ Done: {input_notebook}")
#     except Exception as e:
#         print(f"Error occurred during notebook execution: {str(e)}")
#         raise e

# def main():
#     # List of notebooks to run
#     notebooks = [
#         '00.pre-process.ipynb',
#         # '01.feature-engineering.ipynb',
#         # '02.model-training.ipynb',
#         '03.post-processing.ipynb'
#     ]

#     # Using ThreadPoolExecutor to run notebooks in parallel
#     with ThreadPoolExecutor(max_workers=4) as executor:
#         futures = [executor.submit(run_notebook, notebook) for notebook in notebooks]

#         # Wait for all futures to complete
#         for future in as_completed(futures):
#             future.result()  # If any exceptions were raised, it will be propagated here

#     print("Flow has completed successfully!")

# if __name__ == "__main__":
#     main()

Running notebook: 00.pre-process.ipynbRunning notebook: 03.post-processing.ipynb



Executing:   0%|          | 0/3 [00:00<?, ?cell/s]

Executing:   0%|          | 0/3 [00:00<?, ?cell/s]

✅ Done: 03.post-processing.ipynb
✅ Done: 00.pre-process.ipynb
Flow has completed successfully!
