In [10]:
# Parameters for post-hoc models; you must set those parameters for this task
ModelID = 'M06' # Model ID
WeightID = 'W1' # Weight ID for ACAM
NumGene_CL = 100 # The max number of genes to select for evaluation, denoted as Kn in the manuscript.
pCutoff = 0.005 # COX hazard model significance criteria to select learning results during priority-based model selection.
ExcRate = 0.2 # Percentage of results to be excluded during priority-based model selection.
NmodEahG = 1 # The number of best models to select for each independent learning during priority-based model selection.


# Path setting
FilePath = '../2.ModelTrain/ModelResults/'
SavePath = './EvalResults/'
ModelName = 'RCFR_AC'


# Model Preset; the parameter values must be the same as in the model training step.
EmbedSize = 50
NCL_Feat = 5
NCL_Ind = 2
AdjCosWeight_ = 1. # This parameter affects only training phases, so thus any float number can be set in the post analysis phases. 

In [11]:
import os
import sys
sys.path.insert(0,'..')

import pickle
import pandas as pd
import numpy as np
import re

import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.models import Model ,load_model


from lifelines import CoxPHFitter
from SRC.Models.RCFR_AC import SetModel
from SRC.Module.DataProcessing import DataLoad
from SRC.Module.MetricsGroup import DoMetric, DoAggMetric, DoSimEval



os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="3"



### Function for priority-based model selection

In [12]:
def Aggregation(MetricTable,AggMetricList):
    AggMetricTable = DoSimEval(MetricTable, 'MaxSurvpVal',pCutoff, AggMetricList, ExcRate, NmodEahG)
    AggMetricRank = DoAggMetric(AggMetricList, AggMetricTable[['Model']+AggMetricList]).sort_values('Metrics')
    AggMetricRank = pd.merge(AggMetricRank, AggMetricTable[['Model','MaxSurvpVal']], on='Model', how='left')
    BestModel = AggMetricRank.sort_values('Metrics').iloc[-1]
    
    return AggMetricRank, BestModel



### Data load

In [13]:
## Data load
StackedData, IntToGene, TTE, EVENT, TrIndEmbeddMask, ReferencePatIDLong, ReferencePatIDShort, NormDismInd, MergedData= DataLoad()

PatIDX = StackedData[:, 0:1].astype('int')
GeneIDX = StackedData[:, 1:2].astype('int')
GeneExp = StackedData[:, 2:3]

IndN = len(np.unique(PatIDX))
FeatN = len(np.unique(GeneIDX))

### Setting for tasks

In [14]:
# Task set-up
ModelList = os.listdir(FilePath)
ModelList = [i for i in ModelList if ModelID in i ]


# Model structure load
RCFR_AC, LayerList = SetModel(AdjCosWeight_, NormDismInd, TrIndEmbeddMask, IndN, FeatN, ReferencePatIDLong, ReferencePatIDShort)

# Data for calculating metric
DataMetric = [MergedData, TTE, EVENT, NCL_Ind, NCL_Feat, NumGene_CL, IntToGene]

ColList = ['Model','AvgtPRate', 'AvgtAdjPRate', 'MintAdjPRate', 'AvgABSGeCohD', 'MinABSGeCohD', 'AvgABSSurvCoef', 'MinABSSurvCoef', 'AvgSurvpVal', 
           'MaxSurvpVal', 'NegExpAvgSurvpVal', 'NegExpMinSurvpVal', 'AvgNegSigRate',  'MinNegSigRate', 'AvgPosSigRate', 'MinPosSigRate','IndCentRatio']



### Procedure for Selecting best model

In [15]:
## Procedure for model evaluation
MetricTable = pd.DataFrame(columns=ColList)
InfoFeatGroupList = []

for num, model in enumerate(ModelList[:]):
    print(num)

    RCFR_AC.load_weights(FilePath + model)  # Model weights load
    InpInd, InpFeat, IndEmbeddWeig, IndEmbeddReferenceLong, FeatEmbeddWeig, IndCentroid, FeatCentroid, ICosCLSim, FCosCLSim = LayerList

    # Metric calculation: InfoFeatGroup will be used in UMAP analysis
    metrics, InfoFeatGroup = DoMetric (DataMetric, [InpInd, InpFeat, IndEmbeddWeig, FeatEmbeddWeig, IndCentroid, FeatCentroid, ICosCLSim, FCosCLSim])
    InfoFeatGroupList.append(InfoFeatGroup)
    print('NegSigRate :',InfoFeatGroup[0],' , PosSigRate :',InfoFeatGroup[1],' , SurvpVal :',InfoFeatGroup[2])
    MetricTable = pd.concat([MetricTable, pd.DataFrame([[model] + metrics], columns=ColList)], axis=0)

MetricTable['GroupM'] = np.array([re.findall('.\d+', i)[2][1:] for i in  MetricTable['Model']])
MetricTable['EpNum'] = np.array([ re.findall('.\d+\.', i)[0][1:-1] for i in  MetricTable['Model']]).astype('int')
MetricTable = MetricTable.sort_values(['GroupM','EpNum'])

# Saving the metric table
MetricTable.to_csv(SavePath+ModelName+'_MetricTable_'+str(WeightID)+'_Filt'+str(NumGene_CL)+'.csv',index=False)

0
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0. 0. 0. 0.]  , SurvpVal : [0.871, 0.141, 0.843, 0.514]
1
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0. 0. 0. 0.]  , SurvpVal : [0.871, 0.141, 0.843, 0.514]
2
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0.002 0.    0.    0.   ]  , SurvpVal : [0.577, 0.14, 0.991, 0.552]
3
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0.002 0.    0.    0.   ]  , SurvpVal : [0.574, 0.139, 0.867, 0.617]
4
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0.002 0.    0.    0.   ]  , SurvpVal : [0.574, 0.139, 0.859, 0.587]
5
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0.002 0.    0.    0.   ]  , SurvpVal : [0.574, 0.138, 0.859, 0.587]
6
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0.002 0.    0.    0.   ]  , SurvpVal : [0.574, 0.138, 0.899, 0.776]
7
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0.004 0.    0.    0.   ]  , SurvpVal : [0.571, 0.138, 0.899, 0.776]
8
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0.01 0.   0.   0.  ]  , SurvpVal : [0.571, 0.138, 0.899, 0.772]
9
N

### Priority-based filter out by metrics

In [16]:
MetricTable = pd.read_csv(SavePath+ModelName+'_MetricTable_'+str(WeightID)+'_Filt'+str(NumGene_CL)+'.csv')

## Procedure for priority-based model selection by metrics
NegMetricList = ['IndCentRatio', 'MinABSSurvCoef', 'AvgABSSurvCoef',  'MinNegSigRate', 'AvgNegSigRate', 'MinABSGeCohD', 'AvgABSGeCohD']
PosMetricList = ['IndCentRatio', 'MinABSSurvCoef', 'AvgABSSurvCoef', 'MinPosSigRate', 'AvgPosSigRate', 'MinABSGeCohD', 'AvgABSGeCohD']

NegAggMetricRank, NegBestModel =  Aggregation(MetricTable, NegMetricList)
PosAggMetricRank, PosBestModel =  Aggregation(MetricTable, PosMetricList)

NegAggMetricRank.to_csv(SavePath+ModelName+'_Neg_AggMetricRank_'+str(WeightID)+'_Filt'+str(NumGene_CL)+'.csv',index=False)
PosAggMetricRank.to_csv(SavePath+ModelName+'_Pos_AggMetricRank_'+str(WeightID)+'_Filt'+str(NumGene_CL)+'.csv',index=False)



1
N obs with filter of IndCentRatio : 16
N obs with filter of MinABSSurvCoef : 13
N obs with filter of AvgABSSurvCoef : 11
N obs with filter of MinNegSigRate : 9
N obs with filter of AvgNegSigRate : 8
N obs with filter of MinABSGeCohD : 7
N obs with filter of AvgABSGeCohD : 6

2
N obs with filter of IndCentRatio : 16
N obs with filter of MinABSSurvCoef : 13
N obs with filter of AvgABSSurvCoef : 11
N obs with filter of MinNegSigRate : 9
N obs with filter of AvgNegSigRate : 8
N obs with filter of MinABSGeCohD : 7
N obs with filter of AvgABSGeCohD : 6

1
N obs with filter of IndCentRatio : 16
N obs with filter of MinABSSurvCoef : 13
N obs with filter of AvgABSSurvCoef : 11
N obs with filter of MinPosSigRate : 9
N obs with filter of AvgPosSigRate : 8
N obs with filter of MinABSGeCohD : 7
N obs with filter of AvgABSGeCohD : 6

2
N obs with filter of IndCentRatio : 16
N obs with filter of MinABSSurvCoef : 13
N obs with filter of AvgABSSurvCoef : 11
N obs with filter of MinPosSigRate : 9
N ob

  AggMetricTable = AggMetricTable.append(SelMetric)
  AggMetricTable = AggMetricTable.append(SelMetric)
  AggMetricTable = AggMetricTable.append(SelMetric)
  AggMetricTable = AggMetricTable.append(SelMetric)
