In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
#from sklearn.metrics import accuracy_score,precision_score,\
#recall_score,confusion_matrix,roc_auc_score,average_precision_score

from sklearn.metrics import confusion_matrix,accuracy_score,roc_auc_score,precision_score,\
recall_score,f1_score,roc_curve,cohen_kappa_score,matthews_corrcoef,classification_report,auc

from utils_multiclass_classifier import *
import seaborn as sns

from sklearn.preprocessing import LabelBinarizer

In [2]:
project = "NCI"

result_dir = f"results_{project}/"
os.makedirs(result_dir,exist_ok=True)

In [3]:
df_dict = pd.read_csv("../10methylation_meta/NCI_dict_10class.csv")

class_names = df_dict["DBTA_class"].values
n_class = len(class_names)
print("n_class:", n_class)

n_class: 10


In [4]:
model_names = ["LR", "KNN", "SVM", "RF"]
pred_scores = []
for model_name in model_names:

    df1 = pd.read_csv(f"results_{model_name}/{model_name}_scores.csv")

    actual_names = df1["DBTA_class"].values
    actual_idxs = np.array([np.argwhere(class_names == x)[0][0] for x in actual_names])

    pred_scores1 = df1[class_names].values
    pred_scores1_sum = np.sum(pred_scores1,axis=1)[:,np.newaxis]
    pred_scores1 = pred_scores1/pred_scores1_sum

    pred_scores.append(pred_scores1)
    
    pred_idx1 = np.argmax(pred_scores1, axis=1)
    acc1 = np.mean(actual_idxs == pred_idx1)
    print(f"model_name: {model_name}, acc1: {acc1}")

model_name: LR, acc1: 0.6525612472160356
model_name: KNN, acc1: 0.6620267260579065
model_name: SVM, acc1: 0.6631403118040089
model_name: RF, acc1: 0.6280623608017817


In [5]:
pred_scores = np.stack(pred_scores)
pred_scores = pred_scores.mean(axis=0)

pred_idxs = np.argmax(pred_scores, axis=1)
acc = np.mean(actual_idxs == pred_idxs)
print(f"combined, acc: {acc}")

combined, acc: 0.6592427616926503


In [6]:
def find_topk_idxs(pred_scores, topk=5):
    ## increased sort
    i = np.argsort(pred_scores,axis=1) 

    ## decreased sort
    i1 = np.flip(i, axis=1)

    ## select top k
    top_idx = i1[:, :topk]
    
    return top_idx

In [7]:
topk_idxs = find_topk_idxs(pred_scores, topk=3)
print(topk_idxs.shape)

all_topk_pred_names = class_names[topk_idxs]
print(all_topk_pred_names.shape)

(1796, 3)
(1796, 3)


In [8]:
#df_meta = pd.read_csv(f"../10methylation_meta/{project}_slide_selected.csv")
n_cols_meta = df1.shape[1]- (n_class + 3)
df_meta = df1.iloc[:,:n_cols_meta]
df_meta

Unnamed: 0,sample,idat_filename,slide_file_name,slide_name,family_class,family_compound,DBTA_compound,DBTA_name,class_idx,count,NCI_class,age,location,sex,DBTA_class,location_idx,sex_idx,DBTA_class_idx,NCI_class_idx
0,AA03,204920840035_R08C01,AA03_ST-21-3098_A1.ndpi,HGG_00002_AA03_ST-21-3098_A1.ndpi,Grade 4 glioma,DMG_K27+GBM_MES_ATYP+GBM_MES_TYP+GBM_RTK1+GBM_...,GBM_MES_ATYP+GBM_MES_TYP+GBM_RTK1+GBM_RTK2,"Glioblastoma, IDH-wildtype",20,53,GBM_MES_ATYP,41.0,cerebral hemisphere,M,GBM,0,1,0,18
1,AA28,204920840047_R06C01,AA28_ST-21-2997_A1_2.ndpi,HGG_00006_AA28_ST-21-2997_A1_2.ndpi,Grade 4 glioma,DMG_K27+GBM_MES_ATYP+GBM_MES_TYP+GBM_RTK1+GBM_...,GBM_MES_ATYP+GBM_MES_TYP+GBM_RTK1+GBM_RTK2,"Glioblastoma, IDH-wildtype",16,64,GBM_RTK1,68.0,cerebral hemisphere,F,GBM,0,0,0,12
2,AA42,205003700054_R04C01,AA42_ST-20-6587.ndpi,HGG_00010_AA42_ST-20-6587.ndpi,Grade 4 glioma,DMG_K27+GBM_MES_ATYP+GBM_MES_TYP+GBM_RTK1+GBM_...,GBM_MES_ATYP+GBM_MES_TYP+GBM_RTK1+GBM_RTK2,"Glioblastoma, IDH-wildtype",3,102,GBM_RTK2,65.0,cerebral hemisphere,F,GBM,0,0,0,2
3,AA74,204957750034_R03C01,AA74_ST-21-3231_A1.ndpi,HGG_00014_AA74_ST-21-3231_A1.ndpi,Medulloblastoma,MB_G3+MB_G4+MB_SHH+MB_WNT,MB_G3+MB_G4+MB_SHH+MB_WNT,Medulloblastoma,4,97,MB_G4,7.0,posterior fossa,M,MB,1,1,1,3
4,AA93,204957750034_R05C01,AA93_ST-21-3221_A1.ndpi,HGG_00016_AA93_ST-21-3221_A1.ndpi,Meningioma,MNG_BEN+MNG_INT+MNG_MAL,MNG_BEN+MNG_INT+MNG_MAL,Meningioma,6,92,MNG_BEN,38.0,dural based,F,MEN,2,0,4,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1791,Z930,204920840137_R02C01,Z930_SI-21-2751_A10.ndpi,HGG_03076_Z930_SI-21-2751_A10.ndpi,Grade 4 glioma,DMG_K27+GBM_MES_ATYP+GBM_MES_TYP+GBM_RTK1+GBM_...,GBM_MES_ATYP+GBM_MES_TYP+GBM_RTK1+GBM_RTK2,"Glioblastoma, IDH-wildtype",16,64,GBM_RTK1,39.0,cerebral hemisphere,M,GBM,0,1,0,12
1792,Z931,204920840137_R03C01,Z931_SI-21-2836_D-1.ndpi,HGG_03077_Z931_SI-21-2836_D-1.ndpi,IDH-mutant glioma,A_IDH_HG+A_IDH_LG+O_IDH,A_IDH_HG+A_IDH_LG,"Astrocytoma, IDH-mutant",14,66,A_IDH_HG,33.0,cerebral hemisphere,M,A-IDH,0,1,5,11
1793,Z949,204920840132_R01C01,Z949_ST-21-2923_A1.ndpi,HGG_03079_Z949_ST-21-2923_A1.ndpi,Choroid plexus,CPC_PED_AD+CPP_AD+CPP_PED,CPC_PED_AD+CPP_AD+CPP_PED,Choroid plexus,27,38,CPC_PED_AD,2.0,ventricle,M,CP,3,1,6,22
1794,Z952,204920840132_R04C01,Z952_ST-21-2929_A1.ndpi,HGG_03082_Z952_ST-21-2929_A1.ndpi,Medulloblastoma,MB_G3+MB_G4+MB_SHH+MB_WNT,MB_G3+MB_G4+MB_SHH+MB_WNT,Medulloblastoma,2,116,MB_SHH,38.0,posterior fossa,M,MB,1,1,1,1


In [9]:
n_cols_meta = df_meta.shape[1]
df_meta[class_names] = pred_scores
for i in range(3):
    df_meta.insert(n_cols_meta+i, f"top{i+1}_pred_name", all_topk_pred_names[:,i])
    
df_meta

Unnamed: 0,sample,idat_filename,slide_file_name,slide_name,family_class,family_compound,DBTA_compound,DBTA_name,class_idx,count,...,GBM,MB,EPEN,PA,MEN,A-IDH,CP,SE,MPE,O-IDH
0,AA03,204920840035_R08C01,AA03_ST-21-3098_A1.ndpi,HGG_00002_AA03_ST-21-3098_A1.ndpi,Grade 4 glioma,DMG_K27+GBM_MES_ATYP+GBM_MES_TYP+GBM_RTK1+GBM_...,GBM_MES_ATYP+GBM_MES_TYP+GBM_RTK1+GBM_RTK2,"Glioblastoma, IDH-wildtype",20,53,...,0.357466,0.004015,0.085713,0.085232,0.004456,0.279215,0.003650,0.009812,0.005465,0.164976
1,AA28,204920840047_R06C01,AA28_ST-21-2997_A1_2.ndpi,HGG_00006_AA28_ST-21-2997_A1_2.ndpi,Grade 4 glioma,DMG_K27+GBM_MES_ATYP+GBM_MES_TYP+GBM_RTK1+GBM_...,GBM_MES_ATYP+GBM_MES_TYP+GBM_RTK1+GBM_RTK2,"Glioblastoma, IDH-wildtype",16,64,...,0.773123,0.000248,0.015979,0.006098,0.000689,0.067243,0.000461,0.001057,0.000554,0.134548
2,AA42,205003700054_R04C01,AA42_ST-20-6587.ndpi,HGG_00010_AA42_ST-20-6587.ndpi,Grade 4 glioma,DMG_K27+GBM_MES_ATYP+GBM_MES_TYP+GBM_RTK1+GBM_...,GBM_MES_ATYP+GBM_MES_TYP+GBM_RTK1+GBM_RTK2,"Glioblastoma, IDH-wildtype",3,102,...,0.770966,0.000642,0.022964,0.016336,0.002339,0.067752,0.001230,0.003314,0.001934,0.112523
3,AA74,204957750034_R03C01,AA74_ST-21-3231_A1.ndpi,HGG_00014_AA74_ST-21-3231_A1.ndpi,Medulloblastoma,MB_G3+MB_G4+MB_SHH+MB_WNT,MB_G3+MB_G4+MB_SHH+MB_WNT,Medulloblastoma,4,97,...,0.000725,0.624643,0.168432,0.174735,0.002090,0.002003,0.004549,0.017650,0.004143,0.001030
4,AA93,204957750034_R05C01,AA93_ST-21-3221_A1.ndpi,HGG_00016_AA93_ST-21-3221_A1.ndpi,Meningioma,MNG_BEN+MNG_INT+MNG_MAL,MNG_BEN+MNG_INT+MNG_MAL,Meningioma,6,92,...,0.002592,0.004249,0.008019,0.003851,0.960251,0.003047,0.005836,0.003593,0.004807,0.003755
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1791,Z930,204920840137_R02C01,Z930_SI-21-2751_A10.ndpi,HGG_03076_Z930_SI-21-2751_A10.ndpi,Grade 4 glioma,DMG_K27+GBM_MES_ATYP+GBM_MES_TYP+GBM_RTK1+GBM_...,GBM_MES_ATYP+GBM_MES_TYP+GBM_RTK1+GBM_RTK2,"Glioblastoma, IDH-wildtype",16,64,...,0.338815,0.004045,0.063641,0.083225,0.004463,0.321454,0.003666,0.009800,0.005475,0.165416
1792,Z931,204920840137_R03C01,Z931_SI-21-2836_D-1.ndpi,HGG_03077_Z931_SI-21-2836_D-1.ndpi,IDH-mutant glioma,A_IDH_HG+A_IDH_LG+O_IDH,A_IDH_HG+A_IDH_LG,"Astrocytoma, IDH-mutant",14,66,...,0.163039,0.000720,0.077487,0.110886,0.000491,0.478065,0.000571,0.000700,0.000927,0.167115
1793,Z949,204920840132_R01C01,Z949_ST-21-2923_A1.ndpi,HGG_03079_Z949_ST-21-2923_A1.ndpi,Choroid plexus,CPC_PED_AD+CPP_AD+CPP_PED,CPC_PED_AD+CPP_AD+CPP_PED,Choroid plexus,27,38,...,0.003307,0.010549,0.012461,0.012913,0.005390,0.004956,0.934254,0.005875,0.006765,0.003529
1794,Z952,204920840132_R04C01,Z952_ST-21-2929_A1.ndpi,HGG_03082_Z952_ST-21-2929_A1.ndpi,Medulloblastoma,MB_G3+MB_G4+MB_SHH+MB_WNT,MB_G3+MB_G4+MB_SHH+MB_WNT,Medulloblastoma,2,116,...,0.005180,0.463768,0.242793,0.045318,0.003482,0.004040,0.003698,0.224889,0.004062,0.002771


In [10]:
df_meta.to_csv(f"{result_dir}demo_scores.csv", index = None)