In [None]:
from jupyter_client import find_connection_file
connection_file = find_connection_file()
print(connection_file)

In [None]:
import papermill as pm
import pandas as pd
import os
import sys
import shutil

sys.tracebacklimit = 0
def exception_handler(exception_type, exception, traceback):
    # All your trace are belong to us!
    # your format
    print(f"{exception_type.__name__}, {exception}")

sys.excepthook = exception_handler

from constants import *
shutil.copy('constants.py', 'export/')

In [None]:
# Read in Data and drop missing values
df = pd.read_csv("../data/all_vaccines.csv")
df.dropna(inplace=True, subset=[immage_col, dataset_col, day_col, response_col])
dataset_names = df["Dataset"].unique().astype(str)

# Plot distribution of N values
N_vals = df[[dataset_col, uid_col]].groupby(dataset_col, as_index=False)[uid_col].nunique()
N_vals = N_vals.rename(columns={uid_col: "N"})

# Narrow to large datasets only
bNarrow = False
N_vals = N_vals.loc[N_vals["N"] > 70]
if bNarrow:
    filtered_df = df.loc[df["Dataset"].isin(N_vals["Dataset"])]
    dataset_names = filtered_df["Dataset"].unique().astype(str)

In [None]:
datasets = pd.DataFrame(dataset_day_dicts)
dataset_names = datasets[dataset_col].unique().astype(str)
filtered_df = df.loc[df["Dataset"].isin(dataset_names)]

In [None]:
exclude_datasets = ["GSE45735.SDY224", "GSE47353.SDY80"] # , "GSE48023.SDY1276", "SDY296"]
bInfluenza = True
bAdjustMFC = False
bDiscardSeroprotected = True
bOlderOnly = True
age_restrict_str = f"_older-only" if bOlderOnly else ""
seroprotected_str = f"_discard_seroprotected" if bDiscardSeroprotected else ""

In [None]:
if bInfluenza:
    datasets = pd.DataFrame(influenza_dicts)
    dataset_names = datasets["Dataset"].unique().astype(str)
    dataset_names = list(set(dataset_names) - set(exclude_datasets))
    
    datasets = datasets.loc[datasets["Dataset"].isin(dataset_names)]
    filtered_df = df.loc[df["Dataset"].isin(dataset_names)]

parameters = {
            "bAdjustMFC" : bAdjustMFC,
            "bDiscardSeroprotected" : bDiscardSeroprotected,
            "bInfluenza": bInfluenza,
            "bOlderOnly": bOlderOnly,
            }

In [None]:
# dataset_names = dataset_names[:1]
# dataset_names

In [None]:
# Loop through each combination of dataset and strain
if bAdjustMFC == False:
    for dataset_name in dataset_names:
            dataset = datasets.loc[datasets[dataset_col] == dataset_name]
            dataset_df = filtered_df.loc[filtered_df[dataset_col] == dataset_name]
            print(dataset_name)
            days = dataset["Days"].iloc[0]
            for day in days:
                    print(day)
                    day_mask = dataset_df[day_col] == day
                    name_mask = dataset_df[dataset_col] == dataset_name
                    data = dataset_df.loc[(name_mask) & (day_mask)].reset_index()
                    strains = data[strain_col].unique()
                    print(strains)
                    for strain_index in range(len(strains)):
                            strain_name = strains[strain_index].replace("/", "_").replace(" ", "_")
                            print(f'exporting {dataset_name}, strain no. {strain_index}: {strain_name}, day: {day}')
                            # Define parameters for dataset and strain
                            parameters.update({
                                "dataset_name": dataset_name,
                                "strain_index": strain_index,
                                "day": day
                            })
                            # EXECUTE the notebook with specific parameters
                            output_notebook_name = f"{dataset_name}_{strain_name}_{day}{seroprotected_str}{age_restrict_str}"
                            output_notebook = f"export/{output_notebook_name}.ipynb"
                            try:
                                    pm.execute_notebook(
                                            input_path="vaccines-4.ipynb",
                                            output_path=output_notebook,
                                            parameters=parameters,
                                            prepare_only=True
                                    )
                            except:
                                    print (f"******\nCaught exception when runnnig {output_notebook}\n******\n")
                            # Export the executed notebook to HTML
                            output_html = f"{output_notebook_name}.html"
                            os.system(f"jupyter nbconvert --execute --no-input --to html {output_notebook} --output {output_html}")

In [None]:
if bAdjustMFC == True:
    for dataset_name in dataset_names:
            dataset = datasets.loc[datasets[dataset_col] == dataset_name]
            filtered_df = filtered_df.loc[filtered_df[dataset_col] == dataset_name]
            print(dataset_name)
            strain_name = "Influenza"
            print(f'exporting {dataset_name}, using adjusted MFC')
            # Define parameters for dataset and strain
            parameters.update({
                "day0": dataset["Day0"].iloc[0],
                "dayMFC": dataset["DayMFC"].iloc[0]
            })

            # EXECUTE the notebook with specific parameters
            day = 'Adjusted_MFC'
            output_notebook_name = f"{dataset_name}_{strain_name}_{day}{seroprotected_str}{age_restrict_str}"
            output_notebook = f"export/{output_notebook_name}.ipynb"
            try:
                pm.execute_notebook(
                        input_path="vaccines-4.ipynb",
                        output_path=output_notebook,
                        parameters=parameters,
                        prepare_only=True
                )
            except Exception as e:
                print (f"******\nCaught exception when runnnig {output_notebook}\n******\n")
                print(e)
            # Export the executed notebook to HTML
            output_html = f"{output_notebook_name}.html"
            os.system(f"jupyter nbconvert --execute --no-input --to html {output_notebook} --output {output_html}")