In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
#from sklearn.metrics import accuracy_score,precision_score,\
#recall_score,confusion_matrix,roc_auc_score,average_precision_score

from sklearn.metrics import confusion_matrix,accuracy_score,roc_auc_score,precision_score,\
recall_score,f1_score,roc_curve,cohen_kappa_score,matthews_corrcoef,classification_report,auc

from utils_multiclass_classifier import *
import seaborn as sns

from sklearn.preprocessing import LabelBinarizer

In [2]:
project = "NCI"

result_dir = f"results_{project}/"
os.makedirs(result_dir,exist_ok=True)

In [3]:
df_dict = pd.read_csv("../10methylation_meta/NCI-demo_dict_10class.csv")

class_names = df_dict["DBTA_name"].values
n_class = len(class_names)
print("n_class:", n_class)

n_class: 10


In [4]:
model_names = ["LR", "KNN", "SVM", "RF"]
pred_scores = []
for model_name in model_names:

    df1 = pd.read_csv(f"results_{model_name}/{model_name}_score.csv")

    actual_names = df1["DBTA_name"].values
    actual_idxs = np.array([np.argwhere(class_names == x)[0][0] for x in actual_names])

    pred_scores1 = df1[class_names].values
    pred_scores1_sum = np.sum(pred_scores1,axis=1)[:,np.newaxis]
    pred_scores1 = pred_scores1/pred_scores1_sum

    pred_scores.append(pred_scores1)
    
    pred_idx1 = np.argmax(pred_scores1, axis=1)
    acc1 = np.mean(actual_idxs == pred_idx1)
    print(f"model_name: {model_name}, acc1: {acc1}")

model_name: LR, acc1: 0.6664810690423163
model_name: KNN, acc1: 0.6698218262806236
model_name: SVM, acc1: 0.670935412026726
model_name: RF, acc1: 0.6241648106904232


In [5]:
pred_scores = np.stack(pred_scores)
pred_scores = pred_scores.mean(axis=0)

pred_idxs = np.argmax(pred_scores, axis=1)
acc = np.mean(actual_idxs == pred_idxs)
print(f"combined, acc: {acc}")

combined, acc: 0.6648106904231625


In [6]:
def find_topk_idxs(pred_scores, topk=5):
    ## increased sort
    i = np.argsort(pred_scores,axis=1) 

    ## decreased sort
    i1 = np.flip(i, axis=1)

    ## select top k
    top_idx = i1[:, :topk]
    
    return top_idx

In [7]:
topk_idxs = find_topk_idxs(pred_scores, topk=5)
print(topk_idxs.shape)

all_topk_pred_names = class_names[topk_idxs]
print(all_topk_pred_names.shape)

(1796, 5)
(1796, 5)


In [8]:
#df_meta = pd.read_csv(f"../10methylation_meta/{project}_slide_selected.csv")
n_cols_meta = df1.shape[1]- (n_class + 1)
df_meta = df1.iloc[:,:n_cols_meta]
df_meta

Unnamed: 0,sample,idat_filename,slide_file_name,slide_name,family_class,family_compound,DBTA_compound,DBTA_class,class_idx,count,NCI_class,age,location,sex,DBTA_name,location_idx,sex_idx
0,AA03,204920840035_R08C01,AA03_ST-21-3098_A1.ndpi,HGG_00002_AA03_ST-21-3098_A1.ndpi,Grade 4 glioma,DMG_K27+GBM_MES_ATYP+GBM_MES_TYP+GBM_RTK1+GBM_...,GBM_MES_ATYP+GBM_MES_TYP+GBM_RTK1+GBM_RTK2,"Glioblastoma, IDH-wildtype",20,53,GBM_MES_ATYP,41.0,cerebral hemisphere,M,GBM,0,1
1,AA28,204920840047_R06C01,AA28_ST-21-2997_A1_2.ndpi,HGG_00006_AA28_ST-21-2997_A1_2.ndpi,Grade 4 glioma,DMG_K27+GBM_MES_ATYP+GBM_MES_TYP+GBM_RTK1+GBM_...,GBM_MES_ATYP+GBM_MES_TYP+GBM_RTK1+GBM_RTK2,"Glioblastoma, IDH-wildtype",16,64,GBM_RTK1,68.0,cerebral hemisphere,F,GBM,0,0
2,AA42,205003700054_R04C01,AA42_ST-20-6587.ndpi,HGG_00010_AA42_ST-20-6587.ndpi,Grade 4 glioma,DMG_K27+GBM_MES_ATYP+GBM_MES_TYP+GBM_RTK1+GBM_...,GBM_MES_ATYP+GBM_MES_TYP+GBM_RTK1+GBM_RTK2,"Glioblastoma, IDH-wildtype",3,102,GBM_RTK2,65.0,cerebral hemisphere,F,GBM,0,0
3,AA74,204957750034_R03C01,AA74_ST-21-3231_A1.ndpi,HGG_00014_AA74_ST-21-3231_A1.ndpi,Medulloblastoma,MB_G3+MB_G4+MB_SHH+MB_WNT,MB_G3+MB_G4+MB_SHH+MB_WNT,Medulloblastoma,4,97,MB_G4,7.0,posterior fossa,M,MB,1,1
4,AA93,204957750034_R05C01,AA93_ST-21-3221_A1.ndpi,HGG_00016_AA93_ST-21-3221_A1.ndpi,Meningioma,MNG_BEN+MNG_INT+MNG_MAL,MNG_BEN+MNG_INT+MNG_MAL,Meningioma,6,92,MNG_BEN,38.0,dural based,F,MEN,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1791,Z930,204920840137_R02C01,Z930_SI-21-2751_A10.ndpi,HGG_03076_Z930_SI-21-2751_A10.ndpi,Grade 4 glioma,DMG_K27+GBM_MES_ATYP+GBM_MES_TYP+GBM_RTK1+GBM_...,GBM_MES_ATYP+GBM_MES_TYP+GBM_RTK1+GBM_RTK2,"Glioblastoma, IDH-wildtype",16,64,GBM_RTK1,39.0,cerebral hemisphere,M,GBM,0,1
1792,Z931,204920840137_R03C01,Z931_SI-21-2836_D-1.ndpi,HGG_03077_Z931_SI-21-2836_D-1.ndpi,IDH-mutant glioma,A_IDH_HG+A_IDH_LG+O_IDH,A_IDH_HG+A_IDH_LG,"Astrocytoma, IDH-mutant",14,66,A_IDH_HG,33.0,cerebral hemisphere,M,A-IDH,0,1
1793,Z949,204920840132_R01C01,Z949_ST-21-2923_A1.ndpi,HGG_03079_Z949_ST-21-2923_A1.ndpi,Choroid plexus,CPC_PED_AD+CPP_AD+CPP_PED,CPC_PED_AD+CPP_AD+CPP_PED,Choroid plexus,27,38,CPC_PED_AD,2.0,ventricle,M,CP,3,1
1794,Z952,204920840132_R04C01,Z952_ST-21-2929_A1.ndpi,HGG_03082_Z952_ST-21-2929_A1.ndpi,Medulloblastoma,MB_G3+MB_G4+MB_SHH+MB_WNT,MB_G3+MB_G4+MB_SHH+MB_WNT,Medulloblastoma,2,116,MB_SHH,38.0,posterior fossa,M,MB,1,1


In [9]:
n_cols_meta = df_meta.shape[1]
df_meta[class_names] = pred_scores
for i in range(5):
    df_meta.insert(n_cols_meta+i, f"top{i+1}_pred_name", all_topk_pred_names[:,i])
    
df_meta

Unnamed: 0,sample,idat_filename,slide_file_name,slide_name,family_class,family_compound,DBTA_compound,DBTA_class,class_idx,count,...,GBM,MB,EPEN,PA,MEN,A-IDH,CP,SE,MPE,O-IDH
0,AA03,204920840035_R08C01,AA03_ST-21-3098_A1.ndpi,HGG_00002_AA03_ST-21-3098_A1.ndpi,Grade 4 glioma,DMG_K27+GBM_MES_ATYP+GBM_MES_TYP+GBM_RTK1+GBM_...,GBM_MES_ATYP+GBM_MES_TYP+GBM_RTK1+GBM_RTK2,"Glioblastoma, IDH-wildtype",20,53,...,0.283984,0.000525,0.138749,0.071717,0.000316,0.313164,0.000414,0.000528,0.000258,0.190345
1,AA28,204920840047_R06C01,AA28_ST-21-2997_A1_2.ndpi,HGG_00006_AA28_ST-21-2997_A1_2.ndpi,Grade 4 glioma,DMG_K27+GBM_MES_ATYP+GBM_MES_TYP+GBM_RTK1+GBM_...,GBM_MES_ATYP+GBM_MES_TYP+GBM_RTK1+GBM_RTK2,"Glioblastoma, IDH-wildtype",16,64,...,0.748406,0.000305,0.024928,0.010660,0.000745,0.082538,0.000415,0.001067,0.000394,0.130542
2,AA42,205003700054_R04C01,AA42_ST-20-6587.ndpi,HGG_00010_AA42_ST-20-6587.ndpi,Grade 4 glioma,DMG_K27+GBM_MES_ATYP+GBM_MES_TYP+GBM_RTK1+GBM_...,GBM_MES_ATYP+GBM_MES_TYP+GBM_RTK1+GBM_RTK2,"Glioblastoma, IDH-wildtype",3,102,...,0.760712,0.000315,0.037241,0.010444,0.000732,0.105121,0.000417,0.001038,0.000381,0.083598
3,AA74,204957750034_R03C01,AA74_ST-21-3231_A1.ndpi,HGG_00014_AA74_ST-21-3231_A1.ndpi,Medulloblastoma,MB_G3+MB_G4+MB_SHH+MB_WNT,MB_G3+MB_G4+MB_SHH+MB_WNT,Medulloblastoma,4,97,...,0.000534,0.608760,0.134984,0.246514,0.001127,0.000766,0.001970,0.004022,0.000867,0.000455
4,AA93,204957750034_R05C01,AA93_ST-21-3221_A1.ndpi,HGG_00016_AA93_ST-21-3221_A1.ndpi,Meningioma,MNG_BEN+MNG_INT+MNG_MAL,MNG_BEN+MNG_INT+MNG_MAL,Meningioma,6,92,...,0.000472,0.003975,0.006214,0.002060,0.976320,0.000858,0.005072,0.001121,0.003136,0.000772
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1791,Z930,204920840137_R02C01,Z930_SI-21-2751_A10.ndpi,HGG_03076_Z930_SI-21-2751_A10.ndpi,Grade 4 glioma,DMG_K27+GBM_MES_ATYP+GBM_MES_TYP+GBM_RTK1+GBM_...,GBM_MES_ATYP+GBM_MES_TYP+GBM_RTK1+GBM_RTK2,"Glioblastoma, IDH-wildtype",16,64,...,0.333178,0.001961,0.070637,0.064381,0.001617,0.360071,0.001543,0.001280,0.000927,0.164405
1792,Z931,204920840137_R03C01,Z931_SI-21-2836_D-1.ndpi,HGG_03077_Z931_SI-21-2836_D-1.ndpi,IDH-mutant glioma,A_IDH_HG+A_IDH_LG+O_IDH,A_IDH_HG+A_IDH_LG,"Astrocytoma, IDH-mutant",14,66,...,0.174560,0.002360,0.093353,0.129015,0.001484,0.382393,0.001719,0.001143,0.000939,0.213033
1793,Z949,204920840132_R01C01,Z949_ST-21-2923_A1.ndpi,HGG_03079_Z949_ST-21-2923_A1.ndpi,Choroid plexus,CPC_PED_AD+CPP_AD+CPP_PED,CPC_PED_AD+CPP_AD+CPP_PED,Choroid plexus,27,38,...,0.000893,0.004104,0.006388,0.004027,0.003878,0.000845,0.975660,0.001118,0.002389,0.000698
1794,Z952,204920840132_R04C01,Z952_ST-21-2929_A1.ndpi,HGG_03082_Z952_ST-21-2929_A1.ndpi,Medulloblastoma,MB_G3+MB_G4+MB_SHH+MB_WNT,MB_G3+MB_G4+MB_SHH+MB_WNT,Medulloblastoma,2,116,...,0.001376,0.457316,0.281192,0.064090,0.003302,0.002135,0.002293,0.184673,0.001797,0.001827


In [10]:
df_meta.to_csv(f"{result_dir}demo_score.csv", index = None)