In [1]:
done_are = [1,10,11,12,13,14,15,16,17,2,21,22,3,4,5]

In [2]:
import os
import nbformat as nbf
from nbconvert.preprocessors import ExecutePreprocessor
from tqdm.notebook import tqdm

# Set your datasets folder path
datasets_folder = "datasets"

# Loop through the markdown files in the folder
for md_file in tqdm(os.listdir(datasets_folder), desc="Processing files"):
    if md_file.endswith(".md"):
        file_number, dataset_name = md_file[:-3].split("-")
        
        if int(file_number) in done_are:
            print(f"Skipping {md_file}...")
            continue
        else:
            print(f"Processing {md_file}...")
            # Create a new Jupyter Notebook file name
            ipynb_file = f"{file_number}-{dataset_name}.ipynb"
            
            # Read the contents of the markdown file
            with open(os.path.join(datasets_folder, md_file), "r") as f:
                md_content = f.read()

            # Create a new Jupyter Notebook
            nb = nbf.v4.new_notebook()

            # Create the cells
            cell1 = nbf.v4.new_raw_cell(
                f"---\noutput-file: {file_number}-{dataset_name}.html\ntitle: {dataset_name}\n\n---"
            )
            cell2 = nbf.v4.new_markdown_cell(md_content)
            cell3 = nbf.v4.new_code_cell("#| hide\nimport pandas as pd\npd.set_option('display.max_rows', 500)")
            cell4= nbf.v4.new_code_cell("from pheno_utils import PhenoLoader")
            cell5 = nbf.v4.new_code_cell(f"dl = PhenoLoader('{dataset_name}')\ndl")
            cell6 = nbf.v4.new_code_cell("dl.dict")
            

            # Add the cells to the notebook
            nb.cells = [cell1, cell2, cell3, cell4, cell5, cell6]

            # Write the notebook to a file
            with open(os.path.join(datasets_folder, ipynb_file), "w") as f:
                nbf.write(nb, f)
            
            print(f"Created {ipynb_file} successfully.")

            # Execute the generated notebook
            with open(os.path.join(datasets_folder, ipynb_file), "r") as f:
                notebook = nbf.read(f, as_version=4)

            ep = ExecutePreprocessor(timeout=600)
            ep.preprocess(notebook, {"metadata": {"path": datasets_folder}})

            # Save the executed notebook
            with open(os.path.join(datasets_folder, ipynb_file), "w") as f:
                nbf.write(notebook, f)
            print(f"Executed {ipynb_file} successfully.\n\n")

print("Jupyter Notebooks created and executed successfully.")


Processing files:   0%|          | 0/38 [00:00<?, ?it/s]

Skipping 1-population.md...
Skipping 10-vascular.md...
Skipping 11-body_composition.md...
Skipping 12-carotid_ultrasound.md...
Skipping 13-gut_microbiome.md...
Skipping 14-human_genetics.md...
Skipping 15-ecg.md...
Skipping 16-blood_tests.md...
Skipping 17-cgm.md...
Skipping 2-anthropometrics.md...
Skipping 21-medical_conditions.md...
Skipping 22-bone_density.md...
Skipping 3-fundus.md...
Skipping 4-liver_ultrasound.md...
Skipping 5-diet_logging.md...
Processing 6-hand_grip.md...
Created 6-hand_grip.ipynb successfully.
Executed 6-hand_grip.ipynb successfully.


Processing 7-blood_pressure.md...
Created 7-blood_pressure.ipynb successfully.
Executed 7-blood_pressure.ipynb successfully.


Processing 8-serum_lipidomics.md...
Created 8-serum_lipidomics.ipynb successfully.
Executed 8-serum_lipidomics.ipynb successfully.


Processing 9-sleep.md...
Created 9-sleep.ipynb successfully.
Executed 9-sleep.ipynb successfully.


Jupyter Notebooks created and executed successfully.
