In [1]:
import sys
sys.path.append('../code/')

In [2]:
import pandas as pd
import cohort_analysis_utils as utils

In [3]:
# Remove warnings
import warnings
warnings.filterwarnings('ignore')

In [4]:
df_120 = pd.read_csv('../data/ruo_vs_mmk_20240723.csv' , sep='\t', index_col=0, header=0)


In [5]:
df_120 = utils.normalize_column_names(df_120)

In [6]:
cols_120_to_num = ['Collected_volume_mL', 'pH',
                    'MMP9_RUO', 'HSPB1_RUO', 'PERM_RUO',
                    'MMP9_MMK', 'HSPB1_MMK', 'AGRIN_MMK',
                    'KPYM_MMK', 'PERM_MMK', 'Total_protein_UCFP',
                    'Total_protein_BCA']
df_120 = utils.cols_as_numbers(df_120, cols_120_to_num)

In [7]:
df_120 = utils.cols_as_category(df_120, {'Pathology':{'Benign': 0, 'Endometrial cancer': 1}})

In [8]:
PLOT_ROCS = True
METHODS = ['normalized', 'kronmal', 'direct', 'undo_dilution']
MAX_BIOMARKER_COUNT = 3
RESULTS_PATH = '../data/results/60x60'


In [9]:
BIOMARKERS_120 = ['MMP9_RUO', 'HSPB1_RUO', 'PERM_RUO', 'MMP9_MMK', 'HSPB1_MMK', 'AGRIN_MMK', 'KPYM_MMK', 'PERM_MMK']
NORMALIZING_COL_120 = 'Total_protein_BCA'
VOLUME_COL = 'Collected_volume_mL'

In [10]:
models_120 = utils.compute_all_models_and_save(
                            df=df_120,
                            biomarkers=BIOMARKERS_120,
                            normalizing_col=NORMALIZING_COL_120, 
                            volume_col= VOLUME_COL,
                            volume_added=0.5,
                            apply_log=True,
                            avoid_same_biomarker=True,
                            methods=METHODS,
                            max_biomarker_count=MAX_BIOMARKER_COUNT,
                            folder_name=RESULTS_PATH,
                            plot_rocs=PLOT_ROCS,
                            )

In [11]:
for biomarker1 in BIOMARKERS_120:
    for biomarker2 in BIOMARKERS_120[BIOMARKERS_120.index(biomarker1)+1:]:
        utils.plot_scatter_to_file(df_120, 
                                biomarker1, 
                                biomarker2, 
                                normalizing_col=NORMALIZING_COL_120, 
                                apply_log_x=True,
                                apply_log_y=True,
                                hue='Pathology', 
                                folder=RESULTS_PATH+'/scatters/')

In [12]:
for biomarker1 in BIOMARKERS_120+['Total_protein_BCA']:
        utils.plot_scatter_to_file(df_120, 
                                biomarker1, 
                                VOLUME_COL, 
                                normalizing_col=None, 
                                apply_log_x=True,
                                apply_log_y=False,
                                hue='Pathology', 
                                folder=RESULTS_PATH+'/volume_scatters/diluted/')

In [13]:
df_120_undiluted = df_120.copy()
df_120_undiluted['Final_volume'] = df_120_undiluted['Collected_volume_mL'] + 0.5
df_120_undiluted['MMP9_RUO'] = df_120_undiluted['MMP9_RUO'].multiply(df_120_undiluted['Final_volume'].div(df_120_undiluted['Collected_volume_mL'], axis=0), axis=0)
df_120_undiluted['HSPB1_RUO'] = df_120_undiluted['HSPB1_RUO'].multiply(df_120_undiluted['Final_volume'].div(df_120_undiluted['Collected_volume_mL'], axis=0), axis=0)
df_120_undiluted['PERM_RUO'] = df_120_undiluted['PERM_RUO'].multiply(df_120_undiluted['Final_volume'].div(df_120_undiluted['Collected_volume_mL'], axis=0), axis=0)
df_120_undiluted['MMP9_MMK'] = df_120_undiluted['MMP9_MMK'].multiply(df_120_undiluted['Final_volume'].div(df_120_undiluted['Collected_volume_mL'], axis=0), axis=0)
df_120_undiluted['HSPB1_MMK'] = df_120_undiluted['HSPB1_MMK'].multiply(df_120_undiluted['Final_volume'].div(df_120_undiluted['Collected_volume_mL'], axis=0), axis=0)
df_120_undiluted['AGRIN_MMK'] = df_120_undiluted['AGRIN_MMK'].multiply(df_120_undiluted['Final_volume'].div(df_120_undiluted['Collected_volume_mL'], axis=0), axis=0)
df_120_undiluted['KPYM_MMK'] = df_120_undiluted['KPYM_MMK'].multiply(df_120_undiluted['Final_volume'].div(df_120_undiluted['Collected_volume_mL'], axis=0), axis=0)
df_120_undiluted['PERM_MMK'] = df_120_undiluted['PERM_MMK'].multiply(df_120_undiluted['Final_volume'].div(df_120_undiluted['Collected_volume_mL'], axis=0), axis=0)
df_120_undiluted['Total_protein_UCFP'] = df_120_undiluted['Total_protein_UCFP'].multiply(df_120_undiluted['Final_volume'].div(df_120_undiluted['Collected_volume_mL'], axis=0), axis=0)
df_120_undiluted['Total_protein_BCA'] = df_120_undiluted['Total_protein_BCA'].multiply(df_120_undiluted['Final_volume'].div(df_120_undiluted['Collected_volume_mL'], axis=0), axis=0)


In [14]:
for biomarker1 in BIOMARKERS_120+['Total_protein_BCA']:
        utils.plot_scatter_to_file(df_120_undiluted, 
                                biomarker1, 
                                VOLUME_COL, 
                                normalizing_col=None, 
                                apply_log_x=True,
                                apply_log_y=False,
                                hue='Pathology', 
                                folder=RESULTS_PATH+'/volume_scatters/undiluted/')