In [69]:
from jupyter_client import find_connection_file
connection_file = find_connection_file()
print(connection_file)

/home/yonatan/.local/share/jupyter/runtime/kernel-v2-16790ZPmNUueXNcqf.json


In [70]:
import papermill as pm
import pandas as pd
import os
import sys
sys.tracebacklimit = 0
def exception_handler(exception_type, exception, traceback):
    # All your trace are belong to us!
    # your format
    print(f"{exception_type.__name__}, {exception}")

#sys.excepthook = exception_handler

In [71]:
dataset_col = "Dataset"
uid_col = "uid"
age_col = "Age"
day_col = "Day"
response_col = "Response"
immage_col = "IMMAGE"
strain_col = 'Strain'

In [72]:
# Read in Data and drop missing values
df = pd.read_csv("../data/all_vaccines.csv")
df.dropna(inplace=True, subset=[immage_col, dataset_col, day_col, response_col])
dataset_names = df["Dataset"].unique().astype(str)

# Plot distribution of N values
N_vals = df[[dataset_col, uid_col]].groupby(dataset_col, as_index=False)[uid_col].nunique()
N_vals = N_vals.rename(columns={uid_col: "N"})

# Narrow to large datasets only
bNarrow = False
N_vals = N_vals.loc[N_vals["N"] > 70]
if bNarrow:
    filtered_df = df.loc[df["Dataset"].isin(N_vals["Dataset"])]
    dataset_names = filtered_df["Dataset"].unique().astype(str)

In [73]:
# All papers
dataset_day_dicts = [
        {"Dataset": "GSE125921.SDY1529", "Days": ['FC', 'D84']},
        {"Dataset": "GSE13485.SDY1264", "Days": ['D60']},
        {"Dataset": "GSE13699.SDY1289", "Days": ['D28']},
        {"Dataset": "GSE169159", "Days": ['FC.D42', 'D42']},
        {"Dataset": "GSE41080.SDY212", "Days": ['HAI.D28']},
        {"Dataset": "GSE45735.SDY224", "Days": ['HAI.D21']},
        {"Dataset": "GSE47353.SDY80", "Days": ['D70.nAb', 'FC.D70.nAb']},
        {"Dataset": "GSE48018.SDY1276", "Days": ['nAb.D28', 'nAb.FC']},
        {"Dataset": "GSE48023.SDY1276", "Days": ['nAb.FC', 'nAb.D14']},
        {"Dataset": "GSE59635.SDY63", "Days": ['HAI.D28']},
        {"Dataset": "GSE59654.SDY180", "Days": ['FC.HAI', 'HAI.D28']},
        {"Dataset": "GSE59654.SDY404", "Days": ['FC.HAI', 'HAI.D28']},
        {"Dataset": "GSE59654.SDY520", "Days": ['FC.HAI', 'HAI.D28']},
        {"Dataset": "GSE59743.SDY400", "Days": ['FC.HAI', 'HAI.D28']},
        {"Dataset": "GSE65834.SDY1328", "Days": ['D7', 'FC']},
        {"Dataset": "GSE79396.SDY984", "Days": ['D28', 'FC.D28']},
        {"Dataset": "GSE82152.SDY1294", "Days": ['D28', 'FC']},
        {"Dataset": "SDY1325", "Days": ['FC.D28', 'D28']},
        {"Dataset": "SDY296", "Days": ['D28.nAb', 'FC.nAb']},
        {"Dataset": "SDY67", "Days": ['nAb.D28', 'FC.D28.nAb']},
        {"Dataset": "SDY89", "Days": ['D28']}
]

datasets = pd.DataFrame(dataset_day_dicts)
dataset_names = datasets[dataset_col].unique().astype(str)
filtered_df = df.loc[df["Dataset"].isin(dataset_names)]

In [74]:
# Keep only Influenze datasets for now
# influenza_sets = ['GSE41080.SDY212', 'GSE48018.SDY1276', 'GSE48023.SDY1276', 'SDY67', 'GSE125921.SDY1529', 'GSE45735.SDY224', 'GSE47353.SDY80', 'GSE48023.SDY1276', 'GSE59635.SDY63', 'GSE59654.SDY404', 'GSE59743.SDY400', 'SDY296']
influenza_dicts = [
        {"Dataset": "GSE41080.SDY212", "Days": ["FC.HAI", "HAI.D28"], "Day0": "HAI.D0", "DayMFC": "HAI.MFC"},
        {"Dataset": "GSE48018.SDY1276", "Days": ["HAI.D28", "HAI.FC"], "Day0": "HAI.D0", "DayMFC": "HAI.MFC"},
        {"Dataset": "GSE59654.SDY404", "Days": ["HAI.D28", "FC.HAI"], "Day0": "HAI.D0", "DayMFC": "HAI.MFC"},
        {"Dataset": "GSE59743.SDY400", "Days": ["FC.HAI", "HAI.D28"], "Day0": "HAI.D0", "DayMFC": "HAI.MFC"},
        {"Dataset": "SDY67", "Days": ["FC.D28.HAI", "HAI.D28"], "Day0": "HAI.D0", "DayMFC": "HAI.MFC"},
        {"Dataset": "GSE59635.SDY63", "Days": ["FC", "HAI.D28"], "Day0": "HAI.D0", "DayMFC": "HAI.MFC"},
        # Five subjects only
        # {"Dataset": "GSE45735.SDY224", "Days": ["FC.HAI", "HAI.D21"], "Day0": "HAI.D0", "DayMFC": "HAI.MFC"},
        # Doesn't have a HAI measurement
        # {"Dataset": "GSE47353.SDY80", "Days": ["D70.nAb", "FC.D70.nAb"], "Day0": "D0.nAb"},
        # Need to calculate MFC individually for these
        # {"Dataset": "GSE48023.SDY1276", "Days": ["HAI.FC", "HAI.D28"], "Day0": "HAI.D0", "DayMFC": "HAI.MFC"},
        # {"Dataset": "SDY296", "Days": ["D28.HAI", "FC.HAI"], "Day0": "D0.HAI"}
    ]

In [75]:
bInfluenza = True
bAdjustMFC = True
bDiscardSeroprotected = False

In [76]:
if bInfluenza:
    datasets = pd.DataFrame(influenza_dicts)
    dataset_names = datasets["Dataset"].unique().astype(str)
    filtered_df = df.loc[df["Dataset"].isin(dataset_names)]

In [77]:
# Loop through each combination of dataset and strain
if bAdjustMFC == False:
    for dataset_name in dataset_names[:1]:
            dataset = datasets.loc[datasets[dataset_col] == dataset_name]
            filtered_df = filtered_df.loc[filtered_df[dataset_col] == dataset_name]
            print(dataset_name)
            days = dataset["Days"].iloc[0]
            for day in days:
                    print(day)
                    day_mask = filtered_df[day_col] == day
                    name_mask = filtered_df[dataset_col] == dataset_name
                    data = filtered_df.loc[(name_mask) & (day_mask)].reset_index()
                    strains = data[strain_col].unique()
                    print(strains)
                    for strain_index in range(len(strains)):
                            strain_name = strains[strain_index].replace("/", "_").replace(" ", "_")
                            print(f'exporting {dataset_name}, strain no. {strain_index}: {strain_name}, day: {day}')
                            # Define parameters for dataset and strain
                            parameters = {
                                        "bAdjustMFC" : bAdjustMFC,
                                        "bDiscardSeroprotected" : bDiscardSeroprotected,
                                        "bInfluenza": bInfluenza,
                                        "dataset_name": dataset_name,
                                        "strain_index": strain_index,
                                        "day": day,
                                        "influenza_dicts": influenza_dicts
                                        }

                            # EXECUTE the notebook with specific parameters
                            output_notebook = f"export/{dataset_name}_{strain_name}_{day}_analysis.ipynb"
                            try:
                                    pm.execute_notebook(
                                            input_path="vaccines-4.ipynb",
                                            output_path=output_notebook,
                                            parameters=parameters,
                                            prepare_only=True
                                    )
                            except:
                                    print (f"******\nCaught exception when runnnig {output_notebook}\n******\n")
                            # Export the executed notebook to HTML
                            # output_html = f"{dataset_name}_{strain_name}_{day}_discard_seroprotected.html"
                            output_html = f"{dataset_name}_{strain_name}_{day}.html"
                            os.system(f"jupyter nbconvert --execute --no-input --to html {output_notebook} --output {output_html}")

In [78]:
# Loop through each combination of dataset and strain
if bAdjustMFC == True:
    for dataset_name in dataset_names:
            dataset = datasets.loc[datasets[dataset_col] == dataset_name]
            filtered_df = filtered_df.loc[filtered_df[dataset_col] == dataset_name]
            print(dataset_name)
            strain_name = "Influenza"
            print(f'exporting {dataset_name}, using adjusted MFC')
            # Define parameters for dataset and strain
            parameters = {
                    "bInfluenza": bInfluenza,
                    "bAdjustMFC" : bAdjustMFC,
                    "bDiscardSeroprotected" : bDiscardSeroprotected,
                    "dataset_name": dataset_name,
                    "day0": dataset["Day0"].iloc[0],
                    "dayMFC": dataset["DayMFC"].iloc[0],
                    "influenza_dicts": influenza_dicts
                    }

            # EXECUTE the notebook with specific parameters
            day_string = 'Adjusted_MFC'
            output_notebook = f"export/{dataset_name}_{strain_name}_{day_string}_analysis.ipynb"
            try:
                pm.execute_notebook(
                        input_path="vaccines-4.ipynb",
                        output_path=output_notebook,
                        parameters=parameters,
                        prepare_only=True
                )
            except Exception as e:
                print (f"******\nCaught exception when runnnig {output_notebook}\n******\n")
                print(e)
            # Export the executed notebook to HTML
            output_html = f"{dataset_name}_{strain_name}_{day_string}.html"
            os.system(f"jupyter nbconvert --execute --no-input --to html {output_notebook} --output {output_html}")

GSE41080.SDY212
exporting GSE41080.SDY212, using adjusted MFC


[NbConvertApp] Converting notebook export/GSE41080.SDY212_Influenza_Adjusted_MFC_analysis.ipynb to html
[NbConvertApp] Writing 817788 bytes to export/GSE41080.SDY212_Influenza_Adjusted_MFC.html


GSE48018.SDY1276
exporting GSE48018.SDY1276, using adjusted MFC


[NbConvertApp] Converting notebook export/GSE48018.SDY1276_Influenza_Adjusted_MFC_analysis.ipynb to html
[NbConvertApp] Writing 846226 bytes to export/GSE48018.SDY1276_Influenza_Adjusted_MFC.html


GSE59654.SDY404
exporting GSE59654.SDY404, using adjusted MFC


[NbConvertApp] Converting notebook export/GSE59654.SDY404_Influenza_Adjusted_MFC_analysis.ipynb to html
[NbConvertApp] Writing 786416 bytes to export/GSE59654.SDY404_Influenza_Adjusted_MFC.html


GSE59743.SDY400
exporting GSE59743.SDY400, using adjusted MFC


[NbConvertApp] Converting notebook export/GSE59743.SDY400_Influenza_Adjusted_MFC_analysis.ipynb to html
[NbConvertApp] Writing 748900 bytes to export/GSE59743.SDY400_Influenza_Adjusted_MFC.html


SDY67
exporting SDY67, using adjusted MFC


[NbConvertApp] Converting notebook export/SDY67_Influenza_Adjusted_MFC_analysis.ipynb to html
[NbConvertApp] Writing 845059 bytes to export/SDY67_Influenza_Adjusted_MFC.html


GSE59635.SDY63
exporting GSE59635.SDY63, using adjusted MFC


[NbConvertApp] Converting notebook export/GSE59635.SDY63_Influenza_Adjusted_MFC_analysis.ipynb to html
[NbConvertApp] Writing 740059 bytes to export/GSE59635.SDY63_Influenza_Adjusted_MFC.html
