## Notebook to runner for FOUNDIN-PD running SCRN eQTL analysis by cell-type via papermill

In [1]:
!date

Thu Aug  5 15:39:29 UTC 2021


#### import libraries

In [2]:
import pandas as pd
import os
import papermill as pm
import shutil

  from pyarrow import HadoopFileSystem


In [3]:
# base notebook to be run per tuple iteration
base_notebook = '/home/jupyter/notebooks/cell_cis_qtl_tensorqtl.ipynb'

# output path for the generated notebooks
out_nb_dir = '/home/jupyter/notebooks'
final_out_nb_dir = '/home/jupyter/notebooks/pm_gend_nbs'

# setup iteration tuples
cohort = 'foundin'
version = 'amppdv1'
cell_types = ['iDA', 'DA', 'PFPP', 'eNP', 'ElC', 'lNP', 'NlC']
day = 'da65'
quant_type = 'scrn'

#### iterate over the tuples running the notebook per tuple

In [4]:
%%time
# make sure the notebook output dir exists
os.makedirs(out_nb_dir, exist_ok=True)

for cell_type in cell_types:
    param_dict = {'cohort': cohort, 'version': version, 
                  'cell_type': cell_type, 'day': day, 
                  'quant_type': quant_type}
    out_notebook = f'{out_nb_dir}/{cohort}.{day}.{cell_type}.cis_qtl.ipynb'
    print(param_dict)
    print(out_notebook)
    pm.execute_notebook(input_path=base_notebook, output_path=out_notebook, 
                        parameters=param_dict)

{'cohort': 'foundin', 'version': 'amppdv1', 'cell_type': 'iDA', 'day': 'da65', 'quant_type': 'scrn'}
/home/jupyter/notebooks/foundin.da65.iDA.cis_qtl.ipynb


Executing:   0%|          | 0/43 [00:00<?, ?cell/s]

{'cohort': 'foundin', 'version': 'amppdv1', 'cell_type': 'DA', 'day': 'da65', 'quant_type': 'scrn'}
/home/jupyter/notebooks/foundin.da65.DA.cis_qtl.ipynb


Executing:   0%|          | 0/43 [00:00<?, ?cell/s]

{'cohort': 'foundin', 'version': 'amppdv1', 'cell_type': 'PFPP', 'day': 'da65', 'quant_type': 'scrn'}
/home/jupyter/notebooks/foundin.da65.PFPP.cis_qtl.ipynb


Executing:   0%|          | 0/43 [00:00<?, ?cell/s]

{'cohort': 'foundin', 'version': 'amppdv1', 'cell_type': 'eNP', 'day': 'da65', 'quant_type': 'scrn'}
/home/jupyter/notebooks/foundin.da65.eNP.cis_qtl.ipynb


Executing:   0%|          | 0/43 [00:00<?, ?cell/s]

{'cohort': 'foundin', 'version': 'amppdv1', 'cell_type': 'ElC', 'day': 'da65', 'quant_type': 'scrn'}
/home/jupyter/notebooks/foundin.da65.ElC.cis_qtl.ipynb


Executing:   0%|          | 0/43 [00:00<?, ?cell/s]

{'cohort': 'foundin', 'version': 'amppdv1', 'cell_type': 'lNP', 'day': 'da65', 'quant_type': 'scrn'}
/home/jupyter/notebooks/foundin.da65.lNP.cis_qtl.ipynb


Executing:   0%|          | 0/43 [00:00<?, ?cell/s]

{'cohort': 'foundin', 'version': 'amppdv1', 'cell_type': 'NlC', 'day': 'da65', 'quant_type': 'scrn'}
/home/jupyter/notebooks/foundin.da65.NlC.cis_qtl.ipynb


Executing:   0%|          | 0/43 [00:00<?, ?cell/s]

CPU times: user 33.3 s, sys: 3.7 s, total: 37 s
Wall time: 5h 30min 31s


#### since I had to reference a local modified tensoreQTL, import used relative path, so have to generated the papermill notebook in a specific path, once done move to a pm_gen'd path

In [5]:
for cell_type in cell_types:
    gend_out_notebook = f'{out_nb_dir}/{cohort}.{day}.{cell_type}.cis_qtl.ipynb'
    gend_final_notebook = f'{final_out_nb_dir}/{cohort}.{day}.{cell_type}.cis_qtl.ipynb'
    shutil.move(gend_out_notebook, gend_final_notebook)