In [None]:
from jupyter_client import find_connection_file
connection_file = find_connection_file()
print(connection_file)

In [None]:
# Constants for this project
import constants
from constants import *

import auxFunctions as af

import importlib

import os
import numpy as np
import pandas as pd

import papermill as pm
import shutil
shutil.copy('constants.py', 'export/')
shutil.copy('auxFunctions.py', 'export/')

# Change the current working directory
os.chdir("/home/yonatan/Documents/projects/vaccines/code")

In [None]:
print(dataset_day_dicts_for_adjFC)

In [None]:
importlib.reload(af)

datasets, metadata = af.load_data()
age_str = f", subjects over the age of {age_threshlod}" if bOlderOnly else ""
print(f"""Analysis parameters: discarding serprotected subjects: {bDiscardSeroprotected}{age_str}""")

# Turn on debug here for running a single dataset through analyze_dataset()
bDebug = False
if bDebug:
    results = af.debug_single_dataset(datasets, metadata)
else:
    results = af.analyze_all_datasets(datasets, metadata)

In [None]:
results

In [None]:
# Get all the analyses that look somewhat promising based on the composite (F1 and over-threshold rate) metric
cols_to_access = [
    dataset_col,
    strain_col,
    strain_index_col,
    day_col,
    "max_difference"
]

# Dynamically add all sub-columns for 'Composite'
composite_columns = [col for col in results.columns if col[0] == 'Composite']
cols_to_access.extend(composite_columns)
results["max_difference"] = results.apply(lambda row: max(row["Composite", "IMMAGE"] - row["Composite", "Age"], row["Composite", "Multivariate"] - row["Composite", "Age"]), axis=1)
results = results.sort_values(by="max_difference", ascending=False)
# results = results.loc[results["max_difference"] > 0.1]

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

importlib.reload(af)

# sorted_data = results.sort_values("max_difference", ignore_index=True).reset_index()
# sns.scatterplot(data=sorted_data, x="index", y="max_difference")
sorted_data = results.loc[results["max_difference"] > 0.1].sort_values(dataset_col, ignore_index=True).reset_index()
sns.scatterplot(data=sorted_data, x=dataset_col, y="max_difference")
plt.xticks(rotation=45)  # Adjust the rotation angle as needed
plt.title(f"max(immage - age, immage & age - age)")
af.save_and_show_plot("figures/all-datasets-max-dif.png")

In [None]:
score_mask = (results["Composite", "IMMAGE"] >  results["Composite", "Age"]) | (results["Composite", "Multivariate"] >  results["Composite", "Age"])
results = results.loc[score_mask]

In [None]:
results

In [None]:
# Generate figures as html for promising datasets
for index, row in results.iterrows():
    parameters = {
        "bAdjustMFC" : row["bAdjustMFC"],
        "dataset_name": row[dataset_col],
        "strain_index": row[strain_index_col],
        "day": row[day_col]
    }
    adjFC_str = f"_adjFC" if row["bAdjustMFC"] else ""
    print(f'exporting {row[dataset_col]}, strain no. {row[strain_index_col]}: {row[strain_col]}, day: {row[day_col]}')
    output_notebook_name = f"{row[dataset_col]}_{row[strain_col]}_{row[day_col]}{seroprotected_str}{age_restrict_str}{adjFC_str}"
    output_notebook = f"export/{output_notebook_name}.ipynb"
    try:
            pm.execute_notebook(
                    input_path="vaccines-4.ipynb",
                    output_path=output_notebook,
                    parameters=parameters,
                    prepare_only=True
            )
    except Exception as e:
            print (f"******\nCaught exception when runnnig {output_notebook}\n******\n")
            raise(e)
    # Export the executed notebook to HTML
    output_html = f"{output_notebook_name}.html"
    os.system(f"jupyter nbconvert --execute --no-input --to html {output_notebook} --output {output_html}")

In [None]:
# Save promising results to a spreadsheet
def generate_html_path(row):
    output_html = f"{row[dataset_col]}_{row[strain_col]}_{row[day_col]}{seroprotected_str}{age_restrict_str}.html"
    output_html = os.path.join(af.get_dir_by_name('code'), "export", output_html)    
    output_html = '=HYPERLINK("file://' + output_html + '", "Link")'
    return output_html

results = results[cols_to_access] 
results['html_path'] = results.apply(generate_html_path, axis=1)
results.to_csv(f"export/results_{seroprotected_str}{age_restrict_str}{adjFC_str}.csv", index=False, float_format='%.2f')

In [None]:
results