In [7]:
from jupyter_client import find_connection_file
connection_file = find_connection_file()
print(connection_file)

/home/yonatan/.local/share/jupyter/runtime/kernel-v2-185309nADPHJABIRzS.json


In [8]:
import papermill as pm
import pandas as pd
import os

In [9]:
dataset_col = "Dataset"
uid_col = "uid"
age_col = "Age"
day_col = "Day"
response_col = "Response"
immage_col = "IMMAGE"
strain_col = 'Strain'

In [10]:
# Read in Data and drop missing values
df = pd.read_csv("../data/all_vaccines.csv")
df.dropna(inplace=True, subset=[immage_col, dataset_col, day_col, response_col])

dataset_names = df.Dataset.unique()

# Plot distribution of N values
N_vals = df[[dataset_col, uid_col]].groupby(dataset_col, as_index=False)[uid_col].nunique()
N_vals = N_vals.rename(columns={uid_col: "N"})

# Narrow to large datasets only
N_vals = N_vals.loc[N_vals["N"]> 70]
datasets = df.loc[df["Dataset"].isin(N_vals["Dataset"])]
dataset_names = datasets["Dataset"].unique()

In [11]:
# Collect day info from papers here
dataset_day_dict = {}

dataset_day_dict["GSE41080.SDY212"] = "HAI.D28"
dataset_day_dict["GSE48018.SDY1276"] = "nAb.D28"
dataset_day_dict["GSE48023.SDY1276"] = "nAb.D28"
dataset_day_dict["SDY67"] = "nAb.D28"

In [12]:
# Loop through each combination of dataset and strain
for dataset_name in dataset_names:
        day_mask = datasets[day_col] == dataset_day_dict[dataset_name]
        name_mask = datasets[dataset_col] == dataset_name
        data = datasets.loc[(name_mask) & (day_mask)].reset_index()
        strains = data[strain_col].unique()
        for strain_index in range(len(strains)):
            strain_name = strains[strain_index].replace("/", "_")
            print(f'exporting {dataset_name}, strain no. {strain_index}: {strain_name}')
            # Define parameters for dataset and strain
            parameters = {"dataset_name": dataset_name, "strain_index": strain_index}

            # Execute the notebook with specific parameters
            output_notebook = f"export/{dataset_name}_{strain_name}_analysis.ipynb"
            pm.execute_notebook(
                input_path="vaccines-3.ipynb",
                output_path=output_notebook,
                parameters=parameters
            )
            # Export the executed notebook to HTML
            output_html = f"{dataset_name}_{strain_name}_analysis.html"
            os.system(f"jupyter nbconvert --execute --no-input --to html {output_notebook} --output {output_html}")

exporting GSE41080.SDY212, strain no. 0: A_Brisbane_10_2007


Executing:   0%|          | 0/41 [00:00<?, ?cell/s]

[NbConvertApp] Converting notebook export/GSE41080.SDY212_A_Brisbane_10_2007_analysis.ipynb to html


exporting GSE41080.SDY212, strain no. 1: B_Florida_4_2006


[NbConvertApp] Writing 1297305 bytes to export/GSE41080.SDY212_A_Brisbane_10_2007_analysis.html


Executing:   0%|          | 0/41 [00:00<?, ?cell/s]

[NbConvertApp] Converting notebook export/GSE41080.SDY212_B_Florida_4_2006_analysis.ipynb to html


exporting GSE41080.SDY212, strain no. 2: A_Brisbane_59_2007


[NbConvertApp] Writing 1257715 bytes to export/GSE41080.SDY212_B_Florida_4_2006_analysis.html


Executing:   0%|          | 0/41 [00:00<?, ?cell/s]

[NbConvertApp] Converting notebook export/GSE41080.SDY212_A_Brisbane_59_2007_analysis.ipynb to html


exporting GSE48018.SDY1276, strain no. 0: A_Brisbane_10_2007


[NbConvertApp] Writing 1294905 bytes to export/GSE41080.SDY212_A_Brisbane_59_2007_analysis.html


Executing:   0%|          | 0/41 [00:00<?, ?cell/s]

[NbConvertApp] Converting notebook export/GSE48018.SDY1276_A_Brisbane_10_2007_analysis.ipynb to html


exporting GSE48018.SDY1276, strain no. 1: A_Brisbane_59_2007


[NbConvertApp] Writing 1357941 bytes to export/GSE48018.SDY1276_A_Brisbane_10_2007_analysis.html


Executing:   0%|          | 0/41 [00:00<?, ?cell/s]

[NbConvertApp] Converting notebook export/GSE48018.SDY1276_A_Brisbane_59_2007_analysis.ipynb to html


exporting GSE48018.SDY1276, strain no. 2: B_Florida_4_2006


[NbConvertApp] Writing 1345675 bytes to export/GSE48018.SDY1276_A_Brisbane_59_2007_analysis.html


Executing:   0%|          | 0/41 [00:00<?, ?cell/s]

[NbConvertApp] Converting notebook export/GSE48018.SDY1276_B_Florida_4_2006_analysis.ipynb to html


exporting GSE48023.SDY1276, strain no. 0: A_Brisbane_10_2007


[NbConvertApp] Writing 1326432 bytes to export/GSE48018.SDY1276_B_Florida_4_2006_analysis.html


Executing:   0%|          | 0/41 [00:00<?, ?cell/s]

[NbConvertApp] Converting notebook export/GSE48023.SDY1276_A_Brisbane_10_2007_analysis.ipynb to html


exporting GSE48023.SDY1276, strain no. 1: A_Brisbane_59_2007


[NbConvertApp] Writing 1361488 bytes to export/GSE48023.SDY1276_A_Brisbane_10_2007_analysis.html


Executing:   0%|          | 0/41 [00:00<?, ?cell/s]

[NbConvertApp] Converting notebook export/GSE48023.SDY1276_A_Brisbane_59_2007_analysis.ipynb to html


exporting GSE48023.SDY1276, strain no. 2: B_Florida_4_2006


[NbConvertApp] Writing 1333168 bytes to export/GSE48023.SDY1276_A_Brisbane_59_2007_analysis.html


Executing:   0%|          | 0/41 [00:00<?, ?cell/s]

[NbConvertApp] Converting notebook export/GSE48023.SDY1276_B_Florida_4_2006_analysis.ipynb to html


exporting SDY67, strain no. 0: A_California_7_2009


[NbConvertApp] Writing 1308580 bytes to export/GSE48023.SDY1276_B_Florida_4_2006_analysis.html


Executing:   0%|          | 0/41 [00:00<?, ?cell/s]

[NbConvertApp] Converting notebook export/SDY67_A_California_7_2009_analysis.ipynb to html
[NbConvertApp] Writing 1343885 bytes to export/SDY67_A_California_7_2009_analysis.html
