### Analysis to find the best learning result of RCFR

In [1]:
# Parameters for post-hoc models; you must set those parameters for this task
ModelID = 'M03' # Model ID
NumGene_CL = 100 # The max number of genes to select for evaluation, denoted as Kn in the manuscript.
pCutoff = 0.005 # COX hazard model significance criteria to select learning results during priority-based model selection.
ExcRate = 0.2 # Percentage of results to be excluded during priority-based model selection.
NmodEahG = 1 # The number of best models to select for each independent learning during priority-based model selection.


# Path setting
FilePath = '../2.ModelTrain/ModelResults/'
SavePath = './EvalResults/'
ModelName = 'RCFR'


# Model Preset; the parameter values must be the same as in the model training step.
EmbedSize = 50
NCL_Feat = 5
NCL_Ind = 2


In [2]:
import os
import sys
sys.path.insert(0,'..')

import pickle
import pandas as pd
import numpy as np
import re

import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.models import Model ,load_model


from lifelines import CoxPHFitter
from SRC.Models.RCFR import SetModel
from SRC.Module.DataProcessing import DataLoad
from SRC.Module.MetricsGroup import DoMetric, DoAggMetric, DoSimEval



os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="3"

### Function for priority-based model selection

In [3]:
def Aggregation(MetricTable,AggMetricList):
    AggMetricTable = DoSimEval(MetricTable, 'MaxSurvpVal',pCutoff, AggMetricList, ExcRate, NmodEahG)
    AggMetricRank = DoAggMetric(AggMetricList, AggMetricTable[['Model']+AggMetricList]).sort_values('Metrics')
    AggMetricRank = pd.merge(AggMetricRank, AggMetricTable[['Model','MaxSurvpVal']], on='Model', how='left')
    BestModel = AggMetricRank.sort_values('Metrics').iloc[-1]
    
    return AggMetricRank, BestModel

### Data load

In [4]:
StackedData, IntToGene, TTE, EVENT, TrIndEmbeddMask, ReferencePatIDLong, ReferencePatIDShort, NormDismInd, MergedData= DataLoad()

PatIDX = StackedData[:, 0:1].astype('int')
GeneIDX = StackedData[:, 1:2].astype('int')
GeneExp = StackedData[:, 2:3]

IndN = len(np.unique(PatIDX))
FeatN = len(np.unique(GeneIDX))

## Setting for tasks

In [5]:
# Task set-up
ModelList = os.listdir(FilePath)
ModelList = [i for i in ModelList if ModelID in i ]


# Model structure load
RCFR, LayerList = SetModel( NormDismInd, TrIndEmbeddMask, IndN, FeatN, ReferencePatIDLong, ReferencePatIDShort)

# Data for calculating metric
DataMetric = [MergedData, TTE, EVENT, NCL_Ind, NCL_Feat, NumGene_CL, IntToGene]

ColList = ['Model','AvgtPRate', 'AvgtAdjPRate', 'MintAdjPRate', 'AvgABSGeCohD', 'MinABSGeCohD', 'AvgABSSurvCoef', 'MinABSSurvCoef', 'AvgSurvpVal', 
           'MaxSurvpVal', 'NegExpAvgSurvpVal', 'NegExpMinSurvpVal', 'AvgNegSigRate',  'MinNegSigRate', 'AvgPosSigRate', 'MinPosSigRate','IndCentRatio']


## Procedure for Selecting best mode

In [6]:
## Procedure for model evaluation
MetricTable = pd.DataFrame(columns=ColList)
InfoFeatGroupList = []

for num, model in enumerate(ModelList[:]):
    print(num)

    RCFR.load_weights(FilePath + model)  # Model weights load
    InpInd, InpFeat, IndEmbeddWeig, IndEmbeddReferenceLong, FeatEmbeddWeig, IndCentroid, FeatCentroid, ICosCLSim, FCosCLSim = LayerList

    # Metric calculation: InfoFeatGroup will be used in UMAP analysis
    metrics, InfoFeatGroup = DoMetric (DataMetric, [InpInd, InpFeat, IndEmbeddWeig, FeatEmbeddWeig, IndCentroid, FeatCentroid, ICosCLSim, FCosCLSim])
    InfoFeatGroupList.append(InfoFeatGroup)
    print('NegSigRate :',InfoFeatGroup[0],' , PosSigRate :',InfoFeatGroup[1],' , SurvpVal :',InfoFeatGroup[2])
    MetricTable = pd.concat([MetricTable, pd.DataFrame([[model] + metrics], columns=ColList)], axis=0)

MetricTable['GroupM'] = np.array([re.findall('.\d+', i)[1][1:] for i in  MetricTable['Model']])
MetricTable['EpNum'] = np.array([ re.findall('.\d+\.', i)[0][1:-1] for i in  MetricTable['Model']]).astype('int')
MetricTable = MetricTable.sort_values(['GroupM','EpNum'])

# Saving the metric table
MetricTable.to_csv(SavePath+ModelName+'_MetricTable_Filt'+str(NumGene_CL)+'.csv',index=False)

0
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0. 0. 0. 0.]  , SurvpVal : [0.141, 0.717, 0.877, 0.76]
1
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0. 0. 0. 0.]  , SurvpVal : [0.097, 0.798, 0.939, 0.569]
2
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0.   0.64 0.   0.  ]  , SurvpVal : [0.091, 0.561, 0.555, 0.322]
3
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0.002 0.768 0.    0.014]  , SurvpVal : [0.019, 0.072, 0.17, 0.466]
4
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0.    0.858 0.    0.234]  , SurvpVal : [0.003, 0.067, 0.0, 0.016]
5
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0.002 0.6   0.    0.016]  , SurvpVal : [0.003, 0.004, 0.0, 0.0]
6
NegSigRate : [0.    0.002 0.    0.   ]  , PosSigRate : [0.   0.53 0.   0.01]  , SurvpVal : [0.048, 0.0, 0.0, 0.0]
7
NegSigRate : [0.    0.    0.    0.006]  , PosSigRate : [0.002 0.44  0.002 0.044]  , SurvpVal : [0.111, 0.0, 0.0, 0.0]
8
NegSigRate : [0.    0.    0.    0.002]  , PosSigRate : [0.    0.144 0.    0.022]  , SurvpVal : [0.303, 0.0, 0.0, 0




NegSigRate : [0.         0.         0.         0.03217158]  , PosSigRate : [0.00536193 0.62734584 0.24664879 0.31099196]  , SurvpVal : [0.995, 0.0, 0.0, 0.0]
25
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0. 0. 0. 0.]  , SurvpVal : [0.141, 0.717, 0.877, 0.76]
26
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0. 0. 0. 0.]  , SurvpVal : [0.097, 0.798, 0.939, 0.569]
27
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0.  0.6 0.  0. ]  , SurvpVal : [0.091, 0.56, 0.555, 0.322]
28
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0.002 0.772 0.    0.   ]  , SurvpVal : [0.023, 0.072, 0.24, 0.308]
29
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0.02  0.852 0.    0.216]  , SurvpVal : [0.002, 0.068, 0.001, 0.006]
30
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0.07  0.61  0.    0.016]  , SurvpVal : [0.002, 0.007, 0.0, 0.0]
31
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0.    0.608 0.    0.01 ]  , SurvpVal : [0.048, 0.0, 0.0, 0.0]
32
NegSigRate : [0.    0.    0.    0.002]  , PosSigRate : [0.002 0.424 0.    0.018




NegSigRate : [0.         0.         0.         0.04188482]  , PosSigRate : [0.03664921 0.21727749 0.08115183 0.21989529]  , SurvpVal : [0.995, 0.0, 0.0, 0.0]
45
NegSigRate : [0.         0.         0.         0.04427083]  , PosSigRate : [0.046875   0.359375   0.1953125  0.22395833]  , SurvpVal : [0.0, 0.0, 0.0, 0.0]
46
NegSigRate : [0.         0.         0.         0.03394256]  , PosSigRate : [0.04438642 0.54569191 0.1462141  0.21932115]  , SurvpVal : [0.0, 0.0, 0.0, 0.0]
47
NegSigRate : [0.         0.         0.         0.03141361]  , PosSigRate : [0.02094241 0.51570681 0.30628272 0.31151832]  , SurvpVal : [0.0, 0.0, 0.0, 0.0]
48
NegSigRate : [0.         0.         0.         0.03448276]  , PosSigRate : [0.06100796 0.56498674 0.29708223 0.31830239]  , SurvpVal : [0.0, 0.0, 0.0, 0.0]
49
NegSigRate : [0.         0.         0.         0.03191489]  , PosSigRate : [0.00531915 0.65691489 0.36968085 0.31117021]  , SurvpVal : [0.0, 0.0, 0.0, 0.0]


### Priority-based filter out by metrics

In [7]:
MetricTable = pd.read_csv(SavePath+ModelName+'_MetricTable_Filt'+str(NumGene_CL)+'.csv')

## Procedure for priority-based model selection by metrics
NegMetricList = ['IndCentRatio', 'MinABSSurvCoef', 'AvgABSSurvCoef',  'MinNegSigRate', 'AvgNegSigRate', 'MinABSGeCohD', 'AvgABSGeCohD']
PosMetricList = ['IndCentRatio', 'MinABSSurvCoef', 'AvgABSSurvCoef', 'MinPosSigRate', 'AvgPosSigRate', 'MinABSGeCohD', 'AvgABSGeCohD']

NegAggMetricRank, NegBestModel =  Aggregation(MetricTable, NegMetricList)
PosAggMetricRank, PosBestModel =  Aggregation(MetricTable, PosMetricList)

NegAggMetricRank.to_csv(SavePath+ModelName+'_Neg_AggMetricRank_Filt'+str(NumGene_CL)+'.csv',index=False)
PosAggMetricRank.to_csv(SavePath+ModelName+'_Pos_AggMetricRank_Filt'+str(NumGene_CL)+'.csv',index=False)

1
N obs with filter of IndCentRatio : 9
N obs with filter of MinABSSurvCoef : 8
N obs with filter of AvgABSSurvCoef : 7
N obs with filter of MinNegSigRate : 6
N obs with filter of AvgNegSigRate : 5
N obs with filter of MinABSGeCohD : 4
N obs with filter of AvgABSGeCohD : 4

2
N obs with filter of IndCentRatio : 8
N obs with filter of MinABSSurvCoef : 7
N obs with filter of AvgABSSurvCoef : 6
N obs with filter of MinNegSigRate : 5
N obs with filter of AvgNegSigRate : 4
N obs with filter of MinABSGeCohD : 4
N obs with filter of AvgABSGeCohD : 4

1
N obs with filter of IndCentRatio : 9
N obs with filter of MinABSSurvCoef : 8
N obs with filter of AvgABSSurvCoef : 7
N obs with filter of MinPosSigRate : 6
N obs with filter of AvgPosSigRate : 5
N obs with filter of MinABSGeCohD : 4
N obs with filter of AvgABSGeCohD : 4

2
N obs with filter of IndCentRatio : 8
N obs with filter of MinABSSurvCoef : 7
N obs with filter of AvgABSSurvCoef : 6
N obs with filter of MinPosSigRate : 5
N obs with filte

  AggMetricTable = AggMetricTable.append(SelMetric)
  AggMetricTable = AggMetricTable.append(SelMetric)
  AggMetricTable = AggMetricTable.append(SelMetric)
  AggMetricTable = AggMetricTable.append(SelMetric)
