In [None]:
from jupyter_client import find_connection_file
connection_file = find_connection_file()
print(connection_file)

In [None]:
import papermill as pm
import pandas as pd
import os

In [None]:
dataset_col = "Dataset"
uid_col = "uid"
age_col = "Age"
day_col = "Day"
response_col = "Response"
immage_col = "IMMAGE"
strain_col = 'Strain'

In [None]:
# Read in Data and drop missing values
df = pd.read_csv("../data/all_vaccines.csv")
df.dropna(inplace=True, subset=[immage_col, dataset_col, day_col, response_col])

dataset_names = df.Dataset.unique()

# Plot distribution of N values
N_vals = df[[dataset_col, uid_col]].groupby(dataset_col, as_index=False)[uid_col].nunique()
N_vals = N_vals.rename(columns={uid_col: "N"})

# Narrow to large datasets only
N_vals = N_vals.loc[N_vals["N"]> 70]
datasets = df.loc[df["Dataset"].isin(N_vals["Dataset"])]
dataset_names = datasets["Dataset"].unique()

In [None]:
# Collect day info from papers here
dataset_day_dict = {}

dataset_day_dict["GSE41080.SDY212"] = "HAI.D28"
dataset_day_dict["GSE48018.SDY1276"] = "nAb.D28"
dataset_day_dict["GSE48023.SDY1276"] = "nAb.D28"
dataset_day_dict["SDY67"] = "nAb.D28"

In [None]:
# Loop through each combination of dataset and strain
for dataset_name in dataset_names:
        day_mask = datasets[day_col] == dataset_day_dict[dataset_name]
        name_mask = datasets[dataset_col] == dataset_name
        data = datasets.loc[(name_mask) & (day_mask)].reset_index()
        strains = data[strain_col].unique()
        for strain_index in range(len(strains)):
            strain_name = strains[strain_index].replace("/", "_")
            print(f'exporting {dataset_name}, strain no. {strain_index}: {strain_name}')
            # Define parameters for dataset and strain
            parameters = {"dataset_name": dataset_name, "strain_index": strain_index}

            # Execute the notebook with specific parameters
            output_notebook = f"export/{dataset_name}_{strain_name}_analysis.ipynb"
            pm.execute_notebook(
                input_path="vaccines-2.ipynb",
                output_path=output_notebook,
                parameters=parameters
            )
            # Export the executed notebook to HTML
            output_html = f"{dataset_name}_{strain_name}_analysis.html"
            os.system(f"jupyter nbconvert --execute --no-input --to html {output_notebook} --output {output_html}")