In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys
from pathlib import Path
from datetime import date

import pandas as pd
import numpy as np
from sklearn import metrics

import matplotlib.pyplot as plt
import plotly.express as px
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 600

In [None]:
plt.rcParams['svg.fonttype'] = 'none'

In [None]:
## general configuration

In [None]:
repo_dir = '/home/labs/amit/noamsh/repos/CAR_T'
sys.path.append(repo_dir)
data_dir_path = Path(repo_dir, "data")

In [None]:
figures_dir = Path(data_dir_path, "figures", date.today().isoformat())
figures_dir.mkdir(parents=True, exist_ok=True)

In [None]:
figures_dir

In [None]:
## exp results loading

In [None]:
from evaluation.experiment_managment import load_results_of_exp_name, generate_experiment_name

In [None]:
exps_dict = {
    "Cellular frequencies": load_results_of_exp_name(generate_experiment_name(use_cell_frequencies=True), from_dir=data_dir_path),
    "TNFa-CD16": load_results_of_exp_name(generate_experiment_name(use_TNFA_SIGNALING_VIA_NFKB_CD16_Mono=True), from_dir=data_dir_path),
    "Combined_TNFa": load_results_of_exp_name(generate_experiment_name(use_cell_frequencies=True, use_TNFA_SIGNALING_VIA_NFKB_CD16_Mono=True), from_dir=data_dir_path),
}

In [None]:
## plot per experiment visualization

In [None]:
from evaluation.visualizations import plot_ROC_PRauc_CM_stem

In [None]:
def print_metrics(y_true, y_score):
    report = metrics.classification_report(y_true, y_score, output_dict=True)
    print_report = {
        "R precision": round(report['1']['precision'], 3),
        "NR precision": round(report['0']['precision'], 3),
        'accuracy': round(report['accuracy'],3)
    }
    print_repors_list = [f"{k}: {v}" for k,v in print_report.items()]
    print("\n".join(print_repors_list))

In [None]:
patient_name_map = exps_dict["Cellular frequencies"]['patient_map'].set_index("patient")["sample_name"]

In [None]:
for exp_name, results in exps_dict.items():
    print(f"features: {results['test']['X_train'].columns}")
    for eval_key in ['loocv']: #, 'test'
        y_true = results[eval_key]["y"].rename(index=patient_name_map)
        y_pred = np.array(results[eval_key]["y_pred"])
        y_proba =  np.array(results[eval_key]["y_proba"])

        exp_full_name = f"{exp_name}: {eval_key}"
        print(exp_full_name)
        print(f"model: {results['model']}")
        print_metrics(y_true, y_pred)

        image_path= Path(figures_dir, f"{exp_full_name.replace(':','').replace(' ', '_')}.svg")
        plot_ROC_PRauc_CM_stem(y_true, y_pred, y_proba, flip_stem=True, save_figure_to_path=image_path, use_all_score_range=True)

In [None]:
## plot shape value

In [None]:
import shap

def print_shap_plots(model, X, save_figure_to_path=None):
    try: # tree
        explainer = shap.TreeExplainer(model)
    except:
        try: # kernel
            explainer = shap.Explainer(model, X) 
        except:
            explainer = shap.KernelExplainer(model.predict, X)
    shap_values = explainer(X)
    if len(shap_values.shape) >2:
        shap_values = shap_values[:,:,1]
    # shap.plots.beeswarm(shap_values)
    shap.plots.bar(shap_values, max_display=5)
    if save_figure_to_path is not None:
        plt.savefig(save_figure_to_path, format="pdf")
    return shap_values


In [None]:
for exp_name, results in exps_dict.items():
    model = results["model"]
    X_train = results['test']["X_train"]
    y_train = results['y'].loc[X_train.index]
    model.fit(X_train ,y_train)

    print(f"{exp_name}: train shap values")
    _ = print_shap_plots(model, X_train)
    print(f"{exp_name}: test shap values")
    image_path = Path(figures_dir, f"{exp_name}_test_shap.pdf")
    _ = print_shap_plots(model, results['test']["X_test"]) #, save_figure_to_path=image_path)

In [None]:
## plot combined ROC curve

In [None]:
from evaluation.visualizations import plot_loocv_roc_curves

In [None]:
exp_names_to_plot = {
    "Cellular frequencies": "Cellular frequencies",
    "TNFa-CD16":  "Myeloid pathway",
    "Combined_TNFa": "Combined",
}

In [None]:
### figures

In [None]:
plot_loocv_roc_curves(exps_dict, exp_names_to_plot, Path(figures_dir, f"ROC_curve_no_mon_ratio.svg"))

In [None]:
## external evaluation

In [None]:
for exp_name, results in exps_dict.items():
    if exp_name not in ["Combined", "Myeloid pathways"]: # no 
        print(f"haradvala: {exp_name}")
        X_train = results['external_eval']['X_train']
        if X_train is not None:
            print(f"features: {results['external_eval']['X_train'].columns}")
            y_true = results['external_eval']["y"]
            y_pred = np.array(results['external_eval']["y_pred"])
            y_proba =  np.array(results['external_eval']["y_proba"])
    
            image_path= Path(figures_dir, f"haradvala_{exp_name}.svg")
            plot_ROC_PRauc_CM_stem(y_true, y_pred, y_proba, flip_stem=True, save_figure_to_path=image_path, use_all_score_range=True)

In [None]:
### shap

In [None]:
for exp_name, results in exps_dict.items():
    if exp_name not in ["Combined",  "Combined Mono ratio", "Myeloid pathways"]:
        model = results["model"]
        X_train = results['external_eval']["X_train"]
        y_train = results['y']
        model.fit(X_train ,y_train)
    
        print(f"{exp_name}: internal shap values")
        _ = print_shap_plots(model, X_train)
        print(f"{exp_name}: haradvala shap values")
        # image_path = Path(figures_dir, f"{exp_name}_test_shap.pdf")
        _ = print_shap_plots(model, results['external_eval']["X_test"]) #, save_figure_to_path=image_path)

In [None]:
## final metrics table

In [None]:
from sklearn.metrics import roc_auc_score
all_metrics = []
for i, (exp_name, new_exp_name) in enumerate(exp_names_to_plot.items()):
    for dataset in ['loocv', 'external_eval']:
        results = exps_dict[exp_name]
        y_true = results[dataset]["y"]
        y_pred = np.array(results[dataset]["y_pred"])
        y_score = np.array(results[dataset]["y_proba"])
        
        report = metrics.classification_report(y_true, y_pred, output_dict=True)
        report_metrics = {
            "R precision": round(report['1']['precision'], 2),
            "NR precision": round(report['0']['precision'], 2),
            'accuracy': round(report['accuracy'],2)
        }
        report_metrics["ROC AUC"] = round(roc_auc_score(y_true, y_score), 2)
        report_metrics["experiment name"] = new_exp_name
        report_metrics["dataset"] = 'external evaluation' if dataset == 'external_eval' else 'internal evaluation'
        all_metrics.append(report_metrics)
    

In [None]:
summary_df = pd.DataFrame(all_metrics).groupby(by=['experiment name', 'dataset']).max()
summary_df

In [None]:
summary_df.to_csv(Path(figures_dir, "models_metrics_table.csv"))

In [None]:
## orgenize dataset for export

In [None]:
comp_pathways_results = exps_dict['Combined_TNFa']
comp_results = exps_dict['Cellular frequencies']

In [None]:
full_df = pd.concat([
    comp_pathways_results["X"].drop(columns="NK"),
    comp_results["X"],
    comp_pathways_results["y"],
    comp_pathways_results["loocv"]["y_proba"]
]  ,axis=1)
full_df = full_df.loc[:,~full_df.columns.duplicated()].copy()
full_df = full_df.rename(columns={"response_3m": "label {1:R, 0:NR}", 0: "y_pred"}).rename(index=patient_name_map)
full_df['B_category'] = -1 * full_df['B_category']
full_df.columns

In [None]:
wanted_columns = ['label {1:R, 0:NR}', 'y_pred', "HALLMARK_TNFA_SIGNALING_VIA_NFKB-CD16 Mono", 'B_category', 'B', 'CD4 T', 'CD8 T', 'Myeloid', 'NK','Tregs', 'CD14 Mono', 'CD16 Mono']
partial_df = full_df[wanted_columns]
partial_df

In [None]:
full_df.to_csv(Path(figures_dir, "all_fetures_prediction_labels.csv"))
partial_df.to_csv(Path(figures_dir, "partial_fetures_prediction_labels.csv"))