In [1]:
import pathlib
import sys

import numpy as np
import pandas as pd
import pyarrow.parquet as pq
import toml
import torch
from sklearn import preprocessing

sys.path.append("../..")
from MLP_utils.parameters import Parameters
from MLP_utils.utils import (
    Dataset_formatter,
    optimized_model_create,
    output_stats,
    parameter_set,
    results_output,
    test_optimized_model,
    un_nest,
)
from sklearn.metrics import (
    accuracy_score,
    auc,
    confusion_matrix,
    f1_score,
    precision_score,
    recall_score,
    roc_auc_score,
    roc_curve,
)

sys.path.append("../../..")

In [2]:
# Parameters
SHUFFLE = False
CELL_TYPE = "PBMC"
CONTROL_NAME = "DMSO_0.100_%_DMSO_0.025_%"
TREATMENT_NAME = "Thapsigargin_1.000_uM_DMSO_0.025_%"
MODEL_NAME = "DMSO_0.025_vs_Thapsigargin_1"
SHUFFLE_DATA = False

In [3]:
ml_configs_file = pathlib.Path("../../MLP_utils/binary_config.toml").resolve(
    strict=True
)
ml_configs = toml.load(ml_configs_file)
params = Parameters()
mlp_params = parameter_set(params, ml_configs)

# overwrite mlp_params via command line arguments from papermill
mlp_params.CELL_TYPE = CELL_TYPE
mlp_params.MODEL_NAME = MODEL_NAME
mlp_params.CONTROL_NAME = CONTROL_NAME
mlp_params.TREATMENT_NAME = TREATMENT_NAME
mlp_params.MODEL_NAME = MODEL_NAME
mlp_params.SHUFFLE = SHUFFLE

In [4]:
# Import Data
# set data file path under pathlib path for multi-system use
file_path = pathlib.Path(
    f"../../../data/{mlp_params.CELL_TYPE}_preprocessed_sc_norm.parquet"
).resolve(strict=True)

df = pq.read_table(file_path).to_pandas()

In [5]:
def test_loop(df, output_name, title):
    # Code snippet for metadata extraction by Jenna Tomkinson
    df_metadata = list(df.columns[df.columns.str.startswith("Metadata")])

    # define which columns are data and which are descriptive
    df_descriptive = df[df_metadata]
    df_values = df.drop(columns=df_metadata)
    # Creating label encoder
    le = preprocessing.LabelEncoder()
    # Converting strings into numbers
    print(df_values["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].unique().tolist())
    lst_of_treatments = (
        df_values["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].unique().tolist()
    )

    df_values["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"] = le.fit_transform(
        df_values["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"]
    )
    print(df_values["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].unique().tolist())
    lst_of_coded_treatments = (
        df_values["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].unique().tolist()
    )
    # make a dictionary of the treatments and their corresponding codes to decode later
    dict_of_treatments = {}
    for i, j in zip(
        lst_of_coded_treatments,
        lst_of_treatments,
    ):
        dict_of_treatments[i] = j
    # split into X and Y where Y are the predictive column and x are the observable data
    df_values_X = df_values.drop(
        [
            "oneb_Metadata_Treatment_Dose_Inhibitor_Dose",
            "twob_Metadata_Treatment_Dose_Inhibitor_Dose",
            "threeb_Metadata_Treatment_Dose_Inhibitor_Dose",
            "fourb_Metadata_Treatment_Dose_Inhibitor_Dose",
        ],
        axis=1,
    )
    df_values_Y = df_values["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"]
    test_data = Dataset_formatter(
        torch.FloatTensor(df_values_X.values), torch.FloatTensor(df_values_Y.values)
    )

    mlp_params.IN_FEATURES = df_values_X.shape[1]
    print("Number of in features: ", mlp_params.IN_FEATURES)
    if mlp_params.MODEL_TYPE == "Regression":
        mlp_params.OUT_FEATURES = 1
    else:
        mlp_params.OUT_FEATURES = len(
            df_values["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].unique()
        )

    print("Number of out features: ", mlp_params.OUT_FEATURES)

    if mlp_params.OUT_FEATURES > 2:
        mlp_params.MODEL_TYPE = "Multi_Class"
    elif mlp_params.OUT_FEATURES == 2:
        mlp_params.OUT_FEATURES = mlp_params.OUT_FEATURES - 1
        mlp_params.MODEL_TYPE = "Binary_Classification"
    elif mlp_params.OUT_FEATURES == 1:
        mlp_params.MODEL_TYPE = "Regression"
    else:
        pass
    # convert data class into a dataloader to be compatible with pytorch
    test_loader = torch.utils.data.DataLoader(
        dataset=test_data, batch_size=1, shuffle=mlp_params.SHUFFLE
    )
    model, _ = optimized_model_create(mlp_params, mlp_params.MODEL_NAME)
    # calling the testing function and outputting list values of tested model
    if mlp_params.MODEL_TYPE == "Multi_Class" or mlp_params.MODEL_TYPE == "Regression":
        y_pred_list = test_optimized_model(
            model,
            test_loader,
            mlp_params,
            model_name=mlp_params.MODEL_NAME,
            shuffle=mlp_params.SHUFFLE,
        )
    elif mlp_params.MODEL_TYPE == "Binary_Classification":
        y_pred_list, y_pred_prob_list = test_optimized_model(
            model,
            test_loader,
            mlp_params,
            model_name=mlp_params.MODEL_NAME,
            shuffle=mlp_params.SHUFFLE,
        )
    else:
        raise Exception("Model type must be specified for proper model testing")

    # un-nest list if nested i.e. length of input data does not match length of output data
    if len(y_pred_list) != len(df_values_Y):
        y_pred_list = un_nest(y_pred_list)
        y_pred_prob_list = un_nest(y_pred_prob_list)
    else:
        pass

    stats, recall, precision, f1, precision_, recall_, threshold_ = output_stats(
        y_pred_list,
        df_values_Y,
        mlp_params,
        y_pred_prob_list,
        test_name=f"{output_name}_all_testing",
        model_name=mlp_params.MODEL_NAME,
        title=title,
        shuffle=mlp_params.SHUFFLE,
    )
    return (
        stats,
        recall,
        precision,
        f1,
        precision_,
        recall_,
        threshold_,
        dict_of_treatments,
    )

In [6]:
print(df["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].unique())

['LPS_0.010_ug_per_ml_DMSO_0.025_%' 'LPS_0.100_ug_per_ml_DMSO_0.025_%'
 'LPS_Nigericin_100.000_ug_per_ml_1.000_uM_DMSO_0.025_%'
 'LPS_Nigericin_100.000_ug_per_ml_3.000_uM_DMSO_0.025_%'
 'DMSO_0.100_%_DMSO_0.025_%' 'media_ctr_0.0_0_Media_ctr_0.0_0'
 'DMSO_0.100_%_DMSO_1.000_%' 'LPS_1.000_ug_per_ml_DMSO_0.025_%'
 'LPS_10.000_ug_per_ml_DMSO_0.025_%'
 'LPS_Nigericin_100.000_ug_per_ml_10.000_uM_DMSO_0.025_%'
 'Disulfiram_0.100_uM_DMSO_0.025_%' 'Thapsigargin_1.000_uM_DMSO_0.025_%'
 'Thapsigargin_10.000_uM_DMSO_0.025_%' 'DMSO_0.100_%_Z-VAD-FMK_100.000_uM'
 'DMSO_0.100_%_Z-VAD-FMK_30.000_uM'
 'LPS_10.000_ug_per_ml_Disulfiram_0.100_uM'
 'LPS_10.000_ug_per_ml_Disulfiram_1.000_uM'
 'Disulfiram_1.000_uM_DMSO_0.025_%' 'Disulfiram_2.500_uM_DMSO_0.025_%'
 'Topotecan_5.000_nM_DMSO_0.025_%' 'Topotecan_10.000_nM_DMSO_0.025_%'
 'LPS_10.000_ug_per_ml_Disulfiram_2.500_uM'
 'LPS_10.000_ug_per_ml_Z-VAD-FMK_100.000_uM'
 'H2O2_100.000_nM_DMSO_0.025_%' 'H2O2_100.000_uM_DMSO_0.025_%'
 'Topotecan_20.000_nM_DMSO_0

In [7]:
# list of treatments to test with controls varying
paired_treatment_list = [
    # DMSO control
    ["DMSO_0.100_%_DMSO_0.025_%", "LPS_100.000_ug_per_ml_DMSO_0.025_%"],
    ["DMSO_0.100_%_DMSO_0.025_%", "Thapsigargin_1.000_uM_DMSO_0.025_%"],
    ["DMSO_0.100_%_DMSO_0.025_%", "Thapsigargin_10.000_uM_DMSO_0.025_%"],
    ["DMSO_0.100_%_DMSO_0.025_%", "LPS_0.010_ug_per_ml_DMSO_0.025_%"],
    ["DMSO_0.100_%_DMSO_0.025_%", "LPS_0.100_ug_per_ml_DMSO_0.025_%"],
    ["DMSO_0.100_%_DMSO_0.025_%", "LPS_1.000_ug_per_ml_DMSO_0.025_%"],
    ["DMSO_0.100_%_DMSO_0.025_%", "LPS_10.000_ug_per_ml_DMSO_0.025_%"],
    ["DMSO_0.100_%_DMSO_0.025_%", "Flagellin_0.100_ug_per_ml_DMSO_0.025_%"],
    ["DMSO_0.100_%_DMSO_0.025_%", "Flagellin_1.000_ug_per_ml_DMSO_0.025_%"],
    ["DMSO_0.100_%_DMSO_0.025_%", "Flagellin_1.000_ug_per_ml_Disulfiram_1.000_uM"],
    [
        "DMSO_0.100_%_DMSO_0.025_%",
        "LPS_Nigericin_100.000_ug_per_ml_1.000_uM_DMSO_0.025_%",
    ],
    [
        "DMSO_0.100_%_DMSO_0.025_%",
        "LPS_Nigericin_100.000_ug_per_ml_3.000_uM_DMSO_0.025_%",
    ],
    [
        "DMSO_0.100_%_DMSO_0.025_%",
        "LPS_Nigericin_100.000_ug_per_ml_10.000_uM_DMSO_0.025_%",
    ],
    [
        "DMSO_0.100_%_DMSO_0.025_%",
        "LPS_Nigericin_1.000_ug_per_ml_1.000_uM_DMSO_0.025_%",
    ],
    [
        "DMSO_0.100_%_DMSO_0.025_%",
        "LPS_Nigericin_1.000_ug_per_ml_3.000_uM_DMSO_0.025_%",
    ],
    [
        "DMSO_0.100_%_DMSO_0.025_%",
        "LPS_Nigericin_1.000_ug_per_ml_10.000_uM_DMSO_0.025_%",
    ],
    ["DMSO_0.100_%_DMSO_0.025_%", "H2O2_100.000_uM_Z-VAD-FMK_100.000_uM"],
    ["DMSO_0.100_%_DMSO_0.025_%", "H2O2_100.000_uM_DMSO_0.025_%"],
    ["DMSO_0.100_%_DMSO_0.025_%", "H2O2_100.000_nM_DMSO_0.025_%"],
    # LPS (pyroptosis) control
    ["LPS_100.000_ug_per_ml_DMSO_0.025_%", "Thapsigargin_1.000_uM_DMSO_0.025_%"],
    ["LPS_100.000_ug_per_ml_DMSO_0.025_%", "Thapsigargin_10.000_uM_DMSO_0.025_%"],
    ["LPS_10.000_ug_per_ml_DMSO_0.025_%", "Thapsigargin_1.000_uM_DMSO_0.025_%"],
    ["LPS_10.000_ug_per_ml_DMSO_0.025_%", "Thapsigargin_10.000_uM_DMSO_0.025_%"],
    ["LPS_1.000_ug_per_ml_DMSO_0.025_%", "Thapsigargin_1.000_uM_DMSO_0.025_%"],
    ["LPS_1.000_ug_per_ml_DMSO_0.025_%", "Thapsigargin_10.000_uM_DMSO_0.025_%"],
    ["LPS_0.100_ug_per_ml_DMSO_0.025_%", "Thapsigargin_1.000_uM_DMSO_0.025_%"],
    ["LPS_0.100_ug_per_ml_DMSO_0.025_%", "Thapsigargin_10.000_uM_DMSO_0.025_%"],
    ["LPS_0.010_ug_per_ml_DMSO_0.025_%", "Thapsigargin_1.000_uM_DMSO_0.025_%"],
    ["LPS_0.010_ug_per_ml_DMSO_0.025_%", "Thapsigargin_10.000_uM_DMSO_0.025_%"],
]

In [8]:
# create a dataframe to store the model stats
model_stats_df = pd.DataFrame(
    columns=[
        "treatments_tested",
        "model",
        "group",
        "shuffled_data",
        "PR_Threshold",
        "Precision",
        "Recall",
    ]
)
model_stats_df

Unnamed: 0,treatments_tested,model,group,shuffled_data,PR_Threshold,Precision,Recall


In [9]:
for i in paired_treatment_list:
    # filter df to only include the two treatments to test
    test_df = df.query(
        f"oneb_Metadata_Treatment_Dose_Inhibitor_Dose == '{i[0]}' | oneb_Metadata_Treatment_Dose_Inhibitor_Dose == '{i[1]}'"
    )
    output_name = ("__").join(
        test_df["oneb_Metadata_Treatment_Dose_Inhibitor_Dose"].unique()
    )

    print(output_name)

    title = f'{output_name.split("__")[0].split("_")[0]} vs {("__").join(output_name.split("__")[1].split("_")[:2])}'
    print(title)
    (
        stats,
        recall,
        precision,
        f1,
        precision_,
        recall_,
        threshold_,
        dict_of_treatments,
    ) = test_loop(test_df, output_name, title)
    print(recall, precision, f1)

    threshold_ = np.append(threshold_, None)
    stats_df = pd.DataFrame(
        {
            "PR_Threshold": threshold_,
            "Precision": precision_,
            "Recall": recall_,
        }
    )

    stats_df["treatments_tested"] = "0 vs 1"
    # make it so that the second treatment is always the one that is being tested as the positive label
    stats_df["treatments_tested"] = stats_df["treatments_tested"].replace(
        "0 vs 1", f"{dict_of_treatments[0]} vs {dict_of_treatments[1]}"
    )
    stats_df["model"] = mlp_params.MODEL_NAME
    stats_df["group"] = "test"
    stats_df["shuffled_data"] = mlp_params.SHUFFLE
    stats_df
    model_stats_df = pd.concat([model_stats_df, stats_df], axis=0)

DMSO_0.100_%_DMSO_0.025_%__LPS_100.000_ug_per_ml_DMSO_0.025_%
DMSO vs LPS__100.000


['DMSO_0.100_%_DMSO_0.025_%', 'LPS_100.000_ug_per_ml_DMSO_0.025_%']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.70      0.90      0.79    215130
           1       0.79      0.48      0.59    159435

    accuracy                           0.72    374565
   macro avg       0.74      0.69      0.69    374565
weighted avg       0.74      0.72      0.71    374565



0.47706588892024965 0.7856648521345715 0.5936560960951585


DMSO_0.100_%_DMSO_0.025_%__Thapsigargin_1.000_uM_DMSO_0.025_%
DMSO vs Thapsigargin__1.000


['DMSO_0.100_%_DMSO_0.025_%', 'Thapsigargin_1.000_uM_DMSO_0.025_%']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.86      0.90      0.88    215130
           1       0.85      0.79      0.82    148456

    accuracy                           0.86    363586
   macro avg       0.86      0.85      0.85    363586
weighted avg       0.86      0.86      0.86    363586



0.7926456323759228 0.850097165933407 0.820366774842355


DMSO_0.100_%_DMSO_0.025_%__Thapsigargin_10.000_uM_DMSO_0.025_%
DMSO vs Thapsigargin__10.000


['DMSO_0.100_%_DMSO_0.025_%', 'Thapsigargin_10.000_uM_DMSO_0.025_%']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.87      0.90      0.89    215130
           1       0.87      0.82      0.85    166984

    accuracy                           0.87    382114
   macro avg       0.87      0.86      0.87    382114
weighted avg       0.87      0.87      0.87    382114



0.8249233459493125 0.8690843475353157 0.8464282312747516


LPS_0.010_ug_per_ml_DMSO_0.025_%__DMSO_0.100_%_DMSO_0.025_%
LPS vs DMSO__0.100


['LPS_0.010_ug_per_ml_DMSO_0.025_%', 'DMSO_0.100_%_DMSO_0.025_%']
[1, 0]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.71      0.90      0.79    215130
           1       0.75      0.44      0.56    143080

    accuracy                           0.72    358210
   macro avg       0.73      0.67      0.67    358210
weighted avg       0.73      0.72      0.70    358210



0.4403690243220576 0.7522624704505838 0.5555330235674798


LPS_0.100_ug_per_ml_DMSO_0.025_%__DMSO_0.100_%_DMSO_0.025_%
LPS vs DMSO__0.100


['LPS_0.100_ug_per_ml_DMSO_0.025_%', 'DMSO_0.100_%_DMSO_0.025_%']
[1, 0]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.75      0.90      0.82    215130
           1       0.78      0.54      0.64    138597

    accuracy                           0.76    353727
   macro avg       0.77      0.72      0.73    353727
weighted avg       0.77      0.76      0.75    353727



0.5417938339213692 0.7834955812230674 0.6406043388870405


DMSO_0.100_%_DMSO_0.025_%__LPS_1.000_ug_per_ml_DMSO_0.025_%
DMSO vs LPS__1.000


['DMSO_0.100_%_DMSO_0.025_%', 'LPS_1.000_ug_per_ml_DMSO_0.025_%']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.79      0.90      0.84    215130
           1       0.81      0.62      0.70    139433

    accuracy                           0.79    354563
   macro avg       0.80      0.76      0.77    354563
weighted avg       0.79      0.79      0.79    354563



0.6204843903523556 0.806555665355285 0.7013891422340585


DMSO_0.100_%_DMSO_0.025_%__LPS_10.000_ug_per_ml_DMSO_0.025_%
DMSO vs LPS__10.000


['DMSO_0.100_%_DMSO_0.025_%', 'LPS_10.000_ug_per_ml_DMSO_0.025_%']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.80      0.90      0.85    215130
           1       0.84      0.69      0.76    156490

    accuracy                           0.81    371620
   macro avg       0.82      0.80      0.80    371620
weighted avg       0.82      0.81      0.81    371620



0.6893220014058407 0.8386745657819036 0.7566991217486462


DMSO_0.100_%_DMSO_0.025_%__Flagellin_0.100_ug_per_ml_DMSO_0.025_%
DMSO vs Flagellin__0.100


['DMSO_0.100_%_DMSO_0.025_%', 'Flagellin_0.100_ug_per_ml_DMSO_0.025_%']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.79      0.90      0.85    215130
           1       0.69      0.47      0.56     95915

    accuracy                           0.77    311045
   macro avg       0.74      0.69      0.70    311045
weighted avg       0.76      0.77      0.76    311045



0.4746285773862274 0.6869058756073272 0.5613697599713914


DMSO_0.100_%_DMSO_0.025_%__Flagellin_1.000_ug_per_ml_DMSO_0.025_%
DMSO vs Flagellin__1.000


['DMSO_0.100_%_DMSO_0.025_%', 'Flagellin_1.000_ug_per_ml_DMSO_0.025_%']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.84      0.90      0.87    215130
           1       0.60      0.47      0.53     67931

    accuracy                           0.80    283061
   macro avg       0.72      0.69      0.70    283061
weighted avg       0.79      0.80      0.79    283061



0.4676804404469241 0.604912414318355 0.5275174137201021


DMSO_0.100_%_DMSO_0.025_%__Flagellin_1.000_ug_per_ml_Disulfiram_1.000_uM
DMSO vs Flagellin__1.000


['DMSO_0.100_%_DMSO_0.025_%', 'Flagellin_1.000_ug_per_ml_Disulfiram_1.000_uM']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.91      0.90      0.90    215130
           1       0.81      0.81      0.81    107300

    accuracy                           0.87    322430
   macro avg       0.86      0.86      0.86    322430
weighted avg       0.87      0.87      0.87    322430



0.8112861136999068 0.8075157002254153 0.8093965160552483


LPS_Nigericin_100.000_ug_per_ml_1.000_uM_DMSO_0.025_%__DMSO_0.100_%_DMSO_0.025_%
LPS vs DMSO__0.100


['LPS_Nigericin_100.000_ug_per_ml_1.000_uM_DMSO_0.025_%', 'DMSO_0.100_%_DMSO_0.025_%']
[1, 0]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.74      0.90      0.81    215130
           1       0.83      0.60      0.70    170659

    accuracy                           0.77    385789
   macro avg       0.79      0.75      0.76    385789
weighted avg       0.78      0.77      0.76    385789



0.6036071932918862 0.8323381356000679 0.699755451395965


LPS_Nigericin_100.000_ug_per_ml_3.000_uM_DMSO_0.025_%__DMSO_0.100_%_DMSO_0.025_%
LPS vs DMSO__0.100


['LPS_Nigericin_100.000_ug_per_ml_3.000_uM_DMSO_0.025_%', 'DMSO_0.100_%_DMSO_0.025_%']
[1, 0]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.74      0.90      0.81    215130
           1       0.85      0.62      0.72    184854

    accuracy                           0.77    399984
   macro avg       0.79      0.76      0.76    399984
weighted avg       0.79      0.77      0.77    399984



0.6226102762179883 0.8472490098791243 0.7177638635966771


DMSO_0.100_%_DMSO_0.025_%__LPS_Nigericin_100.000_ug_per_ml_10.000_uM_DMSO_0.025_%
DMSO vs LPS__Nigericin


['DMSO_0.100_%_DMSO_0.025_%', 'LPS_Nigericin_100.000_ug_per_ml_10.000_uM_DMSO_0.025_%']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.68      0.90      0.78    215130
           1       0.80      0.48      0.60    176090

    accuracy                           0.71    391220
   macro avg       0.74      0.69      0.69    391220
weighted avg       0.73      0.71      0.70    391220



0.47922085297291156 0.8026365849946736 0.6001294332671944


DMSO_0.100_%_DMSO_0.025_%__LPS_Nigericin_1.000_ug_per_ml_1.000_uM_DMSO_0.025_%
DMSO vs LPS__Nigericin


['DMSO_0.100_%_DMSO_0.025_%', 'LPS_Nigericin_1.000_ug_per_ml_1.000_uM_DMSO_0.025_%']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.64      0.90      0.75    215130
           1       0.73      0.34      0.47    166013

    accuracy                           0.66    381143
   macro avg       0.69      0.62      0.61    381143
weighted avg       0.68      0.66      0.63    381143



0.34234066006878977 0.7325445007282523 0.466616857419662


DMSO_0.100_%_DMSO_0.025_%__LPS_Nigericin_1.000_ug_per_ml_3.000_uM_DMSO_0.025_%
DMSO vs LPS__Nigericin


['DMSO_0.100_%_DMSO_0.025_%', 'LPS_Nigericin_1.000_ug_per_ml_3.000_uM_DMSO_0.025_%']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.62      0.90      0.74    215130
           1       0.73      0.32      0.44    174261

    accuracy                           0.64    389391
   macro avg       0.67      0.61      0.59    389391
weighted avg       0.67      0.64      0.60    389391



0.3165252121817274 0.726642778099805 0.4409659070468363


DMSO_0.100_%_DMSO_0.025_%__LPS_Nigericin_1.000_ug_per_ml_10.000_uM_DMSO_0.025_%
DMSO vs LPS__Nigericin


['DMSO_0.100_%_DMSO_0.025_%', 'LPS_Nigericin_1.000_ug_per_ml_10.000_uM_DMSO_0.025_%']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.59      0.90      0.71    215130
           1       0.68      0.24      0.36    181166

    accuracy                           0.60    396296
   macro avg       0.63      0.57      0.54    396296
weighted avg       0.63      0.60      0.55    396296



0.24433392579181523 0.6808428824117512 0.3596134551407298


DMSO_0.100_%_DMSO_0.025_%__H2O2_100.000_uM_Z-VAD-FMK_100.000_uM
DMSO vs H2O2__100.000


['DMSO_0.100_%_DMSO_0.025_%', 'H2O2_100.000_uM_Z-VAD-FMK_100.000_uM']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.88      0.90      0.89    215130
           1       0.88      0.84      0.86    173437

    accuracy                           0.88    388567
   macro avg       0.88      0.87      0.88    388567
weighted avg       0.88      0.88      0.88    388567



0.8444334253936588 0.8759015824790977 0.8598796981003572


DMSO_0.100_%_DMSO_0.025_%__H2O2_100.000_uM_DMSO_0.025_%
DMSO vs H2O2__100.000


['DMSO_0.100_%_DMSO_0.025_%', 'H2O2_100.000_uM_DMSO_0.025_%']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.66      0.90      0.76    215130
           1       0.50      0.17      0.26    120247

    accuracy                           0.64    335377
   macro avg       0.58      0.54      0.51    335377
weighted avg       0.60      0.64      0.58    335377



0.1717797533410397 0.4988648988069362 0.25555974834986056


DMSO_0.100_%_DMSO_0.025_%__H2O2_100.000_nM_DMSO_0.025_%
DMSO vs H2O2__100.000


['DMSO_0.100_%_DMSO_0.025_%', 'H2O2_100.000_nM_DMSO_0.025_%']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.66      0.90      0.76    215130
           1       0.50      0.17      0.25    120289

    accuracy                           0.64    335419
   macro avg       0.58      0.54      0.51    335419
weighted avg       0.60      0.64      0.58    335419



0.1703979582505466 0.49693311028680875 0.25377624801901744


Thapsigargin_1.000_uM_DMSO_0.025_%__LPS_100.000_ug_per_ml_DMSO_0.025_%
Thapsigargin vs LPS__100.000


['Thapsigargin_1.000_uM_DMSO_0.025_%', 'LPS_100.000_ug_per_ml_DMSO_0.025_%']
[1, 0]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.73      0.52      0.61    159435
           1       0.61      0.79      0.69    148456

    accuracy                           0.65    307891
   macro avg       0.67      0.66      0.65    307891
weighted avg       0.67      0.65      0.65    307891



0.7926456323759228 0.6073946751731756 0.6877641076594874


Thapsigargin_10.000_uM_DMSO_0.025_%__LPS_100.000_ug_per_ml_DMSO_0.025_%
Thapsigargin vs LPS__100.000


['Thapsigargin_10.000_uM_DMSO_0.025_%', 'LPS_100.000_ug_per_ml_DMSO_0.025_%']
[1, 0]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.74      0.52      0.61    159435
           1       0.64      0.82      0.72    166984

    accuracy                           0.68    326419
   macro avg       0.69      0.67      0.67    326419
weighted avg       0.69      0.68      0.67    326419



0.8249233459493125 0.6442589214723352 0.7234830380730788


LPS_10.000_ug_per_ml_DMSO_0.025_%__Thapsigargin_1.000_uM_DMSO_0.025_%
LPS vs Thapsigargin__1.000


['LPS_10.000_ug_per_ml_DMSO_0.025_%', 'Thapsigargin_1.000_uM_DMSO_0.025_%']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.61      0.31      0.41    156490
           1       0.52      0.79      0.63    148456

    accuracy                           0.55    304946
   macro avg       0.57      0.55      0.52    304946
weighted avg       0.57      0.55      0.52    304946



0.7926456323759228 0.5217273714779755 0.6292656971505424


LPS_10.000_ug_per_ml_DMSO_0.025_%__Thapsigargin_10.000_uM_DMSO_0.025_%
LPS vs Thapsigargin__10.000


['LPS_10.000_ug_per_ml_DMSO_0.025_%', 'Thapsigargin_10.000_uM_DMSO_0.025_%']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.62      0.31      0.41    156490
           1       0.56      0.82      0.67    166984

    accuracy                           0.58    323474
   macro avg       0.59      0.57      0.54    323474
weighted avg       0.59      0.58      0.55    323474



0.8249233459493125 0.5608193110523937 0.6677039783812605


LPS_1.000_ug_per_ml_DMSO_0.025_%__Thapsigargin_1.000_uM_DMSO_0.025_%
LPS vs Thapsigargin__1.000


['LPS_1.000_ug_per_ml_DMSO_0.025_%', 'Thapsigargin_1.000_uM_DMSO_0.025_%']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.63      0.38      0.47    139433
           1       0.58      0.79      0.67    148456

    accuracy                           0.59    287889
   macro avg       0.60      0.59      0.57    287889
weighted avg       0.60      0.59      0.57    287889



0.7926456323759228 0.5762945114575222 0.6673737044336372


LPS_1.000_ug_per_ml_DMSO_0.025_%__Thapsigargin_10.000_uM_DMSO_0.025_%
LPS vs Thapsigargin__10.000


['LPS_1.000_ug_per_ml_DMSO_0.025_%', 'Thapsigargin_10.000_uM_DMSO_0.025_%']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.64      0.38      0.48    139433
           1       0.61      0.82      0.70    166984

    accuracy                           0.62    306417
   macro avg       0.63      0.60      0.59    306417
weighted avg       0.63      0.62      0.60    306417



0.8249233459493125 0.6142242436403362 0.7041500425560193


LPS_0.100_ug_per_ml_DMSO_0.025_%__Thapsigargin_1.000_uM_DMSO_0.025_%
LPS vs Thapsigargin__1.000


['LPS_0.100_ug_per_ml_DMSO_0.025_%', 'Thapsigargin_1.000_uM_DMSO_0.025_%']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.67      0.46      0.55    138597
           1       0.61      0.79      0.69    148456

    accuracy                           0.63    287053
   macro avg       0.64      0.63      0.62    287053
weighted avg       0.64      0.63      0.62    287053



0.7926456323759228 0.6104511215787181 0.6897192427173086


LPS_0.100_ug_per_ml_DMSO_0.025_%__Thapsigargin_10.000_uM_DMSO_0.025_%
LPS vs Thapsigargin__10.000


['LPS_0.100_ug_per_ml_DMSO_0.025_%', 'Thapsigargin_10.000_uM_DMSO_0.025_%']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.68      0.46      0.55    138597
           1       0.65      0.82      0.73    166984

    accuracy                           0.66    305581
   macro avg       0.67      0.64      0.64    305581
weighted avg       0.66      0.66      0.65    305581



0.8249233459493125 0.6471950761135125 0.7253306794725978


LPS_0.010_ug_per_ml_DMSO_0.025_%__Thapsigargin_1.000_uM_DMSO_0.025_%
LPS vs Thapsigargin__1.000


['LPS_0.010_ug_per_ml_DMSO_0.025_%', 'Thapsigargin_1.000_uM_DMSO_0.025_%']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.72      0.56      0.63    143080
           1       0.65      0.79      0.72    148456

    accuracy                           0.68    291536
   macro avg       0.69      0.68      0.67    291536
weighted avg       0.69      0.68      0.67    291536



0.7926456323759228 0.6512748988548879 0.7150396339518194


LPS_0.010_ug_per_ml_DMSO_0.025_%__Thapsigargin_10.000_uM_DMSO_0.025_%
LPS vs Thapsigargin__10.000


['LPS_0.010_ug_per_ml_DMSO_0.025_%', 'Thapsigargin_10.000_uM_DMSO_0.025_%']
[0, 1]


Number of in features:  1245
Number of out features:  2
DMSO_0.025_vs_Thapsigargin_1


              precision    recall  f1-score   support

           0       0.73      0.56      0.63    143080
           1       0.69      0.82      0.75    166984

    accuracy                           0.70    310064
   macro avg       0.71      0.69      0.69    310064
weighted avg       0.71      0.70      0.70    310064



0.8249233459493125 0.6861479300846297 0.7491631338360423


In [10]:
model_stats_df

Unnamed: 0,treatments_tested,model,group,shuffled_data,PR_Threshold,Precision,Recall
0,DMSO_0.100_%_DMSO_0.025_% vs LPS_100.000_ug_pe...,DMSO_0.025_vs_Thapsigargin_1,test,False,0.0,0.425654,1.000000
1,DMSO_0.100_%_DMSO_0.025_% vs LPS_100.000_ug_pe...,DMSO_0.025_vs_Thapsigargin_1,test,False,0.0,0.425657,0.999912
2,DMSO_0.100_%_DMSO_0.025_% vs LPS_100.000_ug_pe...,DMSO_0.025_vs_Thapsigargin_1,test,False,0.0,0.425656,0.999906
3,DMSO_0.100_%_DMSO_0.025_% vs LPS_100.000_ug_pe...,DMSO_0.025_vs_Thapsigargin_1,test,False,0.0,0.425657,0.999906
4,DMSO_0.100_%_DMSO_0.025_% vs LPS_100.000_ug_pe...,DMSO_0.025_vs_Thapsigargin_1,test,False,0.0,0.425658,0.999906
...,...,...,...,...,...,...,...
269669,LPS_0.010_ug_per_ml_DMSO_0.025_% vs Thapsigarg...,DMSO_0.025_vs_Thapsigargin_1,test,False,0.999999,0.700000,0.000084
269670,LPS_0.010_ug_per_ml_DMSO_0.025_% vs Thapsigarg...,DMSO_0.025_vs_Thapsigargin_1,test,False,1.0,0.777778,0.000084
269671,LPS_0.010_ug_per_ml_DMSO_0.025_% vs Thapsigarg...,DMSO_0.025_vs_Thapsigargin_1,test,False,1.0,0.764706,0.000078
269672,LPS_0.010_ug_per_ml_DMSO_0.025_% vs Thapsigarg...,DMSO_0.025_vs_Thapsigargin_1,test,False,1.0,0.769231,0.000060


In [11]:
# set path for the model training metrics
metrics_path = pathlib.Path(
    f"../../results/{mlp_params.MODEL_TYPE}/{mlp_params.MODEL_NAME}/{mlp_params.CELL_TYPE}"
)
metrics_path.mkdir(parents=True, exist_ok=True)
# check if the model training metrics file exists
metrics_file = pathlib.Path(f"{metrics_path}/testing_metrics.csv")
if metrics_file.exists():
    metrics_df = pd.read_csv(metrics_file)
    if len(metrics_df["shuffled_data"].unique()) > 1:
        pass
    elif metrics_df["shuffled_data"].unique() == mlp_params.SHUFFLE:
        pass
    else:
        metrics_df = pd.concat([metrics_df, model_stats_df], axis=0)
        metrics_df.to_csv(metrics_file, index=False)
else:
    model_stats_df.to_csv(metrics_file, index=False)