### Analysis to find the best learning result of RCFR_AC$^{+1}$

In [1]:
# Parameters for post-hoc models; you must set those parameters for this task
ModelID = 'M06' # Model ID
WeightID = 'W1' # Weight ID for ACAM
NumGene_CL = 100 # The max number of genes to select for evaluation, denoted as Kn in the manuscript.
pCutoff = 0.005 # COX hazard model significance criteria to select learning results during priority-based model selection.
ExcRate = 0.2 # Percentage of results to be excluded during priority-based model selection.
NmodEahG = 1 # The number of best models to select for each independent learning during priority-based model selection.


# Path setting
FilePath = '../2.ModelTrain/ModelResults/'
SavePath = './EvalResults/'
ModelName = 'RCFR_AC'


# Model Preset; the parameter values must be the same as in the model training step.
EmbedSize = 50
NCL_Feat = 5
NCL_Ind = 2
AdjCosWeight_ = 1. # This parameter affects only training phases, so thus any float number can be set in the post analysis phases. 

In [2]:
import os
import sys
sys.path.insert(0,'..')

import pickle
import pandas as pd
import numpy as np
import re

import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.models import Model ,load_model


from lifelines import CoxPHFitter
from SRC.Models.RCFR_AC import SetModel
from SRC.Module.DataProcessing import DataLoad
from SRC.Module.MetricsGroup import DoMetric, DoAggMetric, DoSimEval



os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="3"



### Function for priority-based model selection

In [3]:
def Aggregation(MetricTable,AggMetricList):
    AggMetricTable = DoSimEval(MetricTable, 'MaxSurvpVal',pCutoff, AggMetricList, ExcRate, NmodEahG)
    AggMetricRank = DoAggMetric(AggMetricList, AggMetricTable[['Model']+AggMetricList]).sort_values('Metrics')
    AggMetricRank = pd.merge(AggMetricRank, AggMetricTable[['Model','MaxSurvpVal']], on='Model', how='left')
    BestModel = AggMetricRank.sort_values('Metrics').iloc[-1]
    
    return AggMetricRank, BestModel



### Data load

In [4]:
## Data load
StackedData, IntToGene, TTE, EVENT, TrIndEmbeddMask, ReferencePatIDLong, ReferencePatIDShort, NormDismInd, MergedData= DataLoad()

PatIDX = StackedData[:, 0:1].astype('int')
GeneIDX = StackedData[:, 1:2].astype('int')
GeneExp = StackedData[:, 2:3]

IndN = len(np.unique(PatIDX))
FeatN = len(np.unique(GeneIDX))

### Setting for tasks

In [5]:
# Task set-up
ModelList = os.listdir(FilePath)
ModelList = [i for i in ModelList if ModelID in i ]


# Model structure load
RCFR_AC, LayerList = SetModel(AdjCosWeight_, NormDismInd, TrIndEmbeddMask, IndN, FeatN, ReferencePatIDLong, ReferencePatIDShort)

# Data for calculating metric
DataMetric = [MergedData, TTE, EVENT, NCL_Ind, NCL_Feat, NumGene_CL, IntToGene]

ColList = ['Model','AvgtPRate', 'AvgtAdjPRate', 'MintAdjPRate', 'AvgABSGeCohD', 'MinABSGeCohD', 'AvgABSSurvCoef', 'MinABSSurvCoef', 'AvgSurvpVal', 
           'MaxSurvpVal', 'NegExpAvgSurvpVal', 'NegExpMinSurvpVal', 'AvgNegSigRate',  'MinNegSigRate', 'AvgPosSigRate', 'MinPosSigRate','IndCentRatio']



### Procedure for Selecting best model

In [6]:
## Procedure for model evaluation
MetricTable = pd.DataFrame(columns=ColList)
InfoFeatGroupList = []

for num, model in enumerate(ModelList[:]):
    print(num)

    RCFR_AC.load_weights(FilePath + model)  # Model weights load
    InpInd, InpFeat, IndEmbeddWeig, IndEmbeddReferenceLong, FeatEmbeddWeig, IndCentroid, FeatCentroid, ICosCLSim, FCosCLSim = LayerList

    # Metric calculation: InfoFeatGroup will be used in UMAP analysis
    metrics, InfoFeatGroup = DoMetric (DataMetric, [InpInd, InpFeat, IndEmbeddWeig, FeatEmbeddWeig, IndCentroid, FeatCentroid, ICosCLSim, FCosCLSim])
    InfoFeatGroupList.append(InfoFeatGroup)
    print('NegSigRate :',InfoFeatGroup[0],' , PosSigRate :',InfoFeatGroup[1],' , SurvpVal :',InfoFeatGroup[2])
    MetricTable = pd.concat([MetricTable, pd.DataFrame([[model] + metrics], columns=ColList)], axis=0)

MetricTable['GroupM'] = np.array([re.findall('.\d+', i)[2][1:] for i in  MetricTable['Model']])
MetricTable['EpNum'] = np.array([ re.findall('.\d+\.', i)[0][1:-1] for i in  MetricTable['Model']]).astype('int')
MetricTable = MetricTable.sort_values(['GroupM','EpNum'])

# Saving the metric table
MetricTable.to_csv(SavePath+ModelName+'_MetricTable_'+str(WeightID)+'_Filt'+str(NumGene_CL)+'.csv',index=False)

0
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0. 0. 0. 0.]  , SurvpVal : [0.141, 0.657, 0.872, 0.76]
1
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0. 0. 0. 0.]  , SurvpVal : [0.097, 0.798, 0.732, 0.457]
2
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0.    0.554 0.    0.   ]  , SurvpVal : [0.065, 0.506, 0.422, 0.271]
3
NegSigRate : [0.    0.002 0.    0.   ]  , PosSigRate : [0.016 0.746 0.    0.002]  , SurvpVal : [0.003, 0.037, 0.168, 0.103]
4
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0.    0.764 0.    0.178]  , SurvpVal : [0.0, 0.057, 0.001, 0.006]
5
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0.004 0.586 0.    0.022]  , SurvpVal : [0.004, 0.004, 0.0, 0.0]
6
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0.    0.478 0.    0.01 ]  , SurvpVal : [0.017, 0.0, 0.0, 0.0]
7
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0.004 0.432 0.    0.012]  , SurvpVal : [0.02, 0.0, 0.0, 0.0]
8
NegSigRate : [0.    0.    0.    0.002]  , PosSigRate : [0.    0.212 0.002 0.016]  , SurvpVal : [0.106, 0.0, 0.0, 0.0]




NegSigRate : [0.002 0.    0.    0.004]  , PosSigRate : [0.058 0.112 0.    0.034]  , SurvpVal : [0.068, 0.0, 0.0, 0.0]
10
NegSigRate : [0.    0.    0.    0.006]  , PosSigRate : [0.    0.028 0.002 0.038]  , SurvpVal : [0.224, 0.0, 0.0, 0.0]
11
NegSigRate : [0.00211416 0.00211416 0.         0.00634249]  , PosSigRate : [0.08879493 0.21141649 0.         0.06131078]  , SurvpVal : [0.169, 0.0, 0.0, 0.0]
12
NegSigRate : [0.00245098 0.00490196 0.         0.00490196]  , PosSigRate : [0.07107843 0.17892157 0.         0.05882353]  , SurvpVal : [0.164, 0.0, 0.0, 0.0]
13
NegSigRate : [0.00777202 0.00518135 0.         0.02072539]  , PosSigRate : [0.25129534 0.41709845 0.         0.11658031]  , SurvpVal : [0.053, 0.0, 0.0, 0.0]
14
NegSigRate : [0.0025 0.0025 0.     0.025 ]  , PosSigRate : [0.2675 0.5775 0.     0.2225]  , SurvpVal : [0.022, 0.0, 0.0, 0.0]
15
NegSigRate : [0.00253165 0.00506329 0.         0.02531646]  , PosSigRate : [0.22025316 0.40253165 0.         0.18227848]  , SurvpVal : [0.061, 0.0

  scores = weights * exp(dot(X, beta))
  denom = 1.0 / np.array([risk_phi])
  a1 = risk_phi_x_x * denom
  summand = numer * denom[:, None]
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  a1 = einsum("ab,i->ab", risk_phi_x_x, denom) - einsum("ab,i->ab", tie_phi_x_x, increasing_proportion * denom)
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()


NegSigRate : [0.00645161 0.         0.         0.03548387]  , PosSigRate : [0.20322581 0.66129032 0.01290323 0.25483871]  , SurvpVal : [0.029, 0.0, 1.0, 0.0]
19
NegSigRate : [0.         0.         0.         0.04026846]  , PosSigRate : [0.19127517 0.72147651 0.30536913 0.43624161]  , SurvpVal : [0.0, 0.0, 0.0, 0.0]
20
NegSigRate : [0.00337838 0.         0.         0.03716216]  , PosSigRate : [0.16554054 0.76689189 0.27027027 0.41216216]  , SurvpVal : [0.0, 0.0, 0.0, 0.0]
21


  scores = weights * exp(dot(X, beta))
  a1 = risk_phi_x_x * denom
  summand = numer * denom[:, None]
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()


NegSigRate : [0.         0.         0.         0.04123711]  , PosSigRate : [0.17525773 0.85910653 0.18213058 0.40893471]  , SurvpVal : [0.0, 0.0, 1.0, 0.0]
22
NegSigRate : [0.         0.         0.         0.04225352]  , PosSigRate : [0.28521127 0.93661972 0.01760563 0.37676056]  , SurvpVal : [0.001, 0.0, 1.0, 0.0]
23


  scores = weights * exp(dot(X, beta))
  denom = 1.0 / np.array([risk_phi])
  a1 = risk_phi_x_x * denom
  summand = numer * denom[:, None]
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  a1 = einsum("ab,i->ab", risk_phi_x_x, denom) - einsum("ab,i->ab", tie_phi_x_x, increasing_proportion * denom)
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()


NegSigRate : [0.00352113 0.         0.         0.06690141]  , PosSigRate : [0.37323944 0.97887324 0.         0.50352113]  , SurvpVal : [0.003, 0.019, 1.0, 0.0]
24
NegSigRate : [0.00361011 0.         0.         0.06137184]  , PosSigRate : [0.49097473 0.96389892 0.         0.54151625]  , SurvpVal : [0.002, 0.524, 0.126, 0.0]
25
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0. 0. 0. 0.]  , SurvpVal : [0.141, 0.657, 0.872, 0.76]
26
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0. 0. 0. 0.]  , SurvpVal : [0.097, 0.798, 0.732, 0.457]
27
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0.    0.568 0.    0.   ]  , SurvpVal : [0.065, 0.506, 0.422, 0.271]
28
NegSigRate : [0.    0.002 0.    0.   ]  , PosSigRate : [0.016 0.762 0.    0.   ]  , SurvpVal : [0.003, 0.037, 0.235, 0.103]
29
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0.    0.784 0.    0.158]  , SurvpVal : [0.0, 0.053, 0.002, 0.004]
30
NegSigRate : [0. 0. 0. 0.]  , PosSigRate : [0.004 0.584 0.    0.026]  , SurvpVal : [0.004, 0.004, 0.0, 0.0]
31
N



NegSigRate : [0.    0.    0.    0.026]  , PosSigRate : [0.    0.094 0.002 0.004]  , SurvpVal : [0.225, 0.0, 0.0, 0.479]
36
NegSigRate : [0.00206612 0.         0.         0.00619835]  , PosSigRate : [0.08471074 0.1446281  0.00206612 0.06198347]  , SurvpVal : [0.17, 0.0, 0.0, 0.0]
37




NegSigRate : [0.00233645 0.00233645 0.         0.05140187]  , PosSigRate : [0.03504673 0.11915888 0.         0.0046729 ]  , SurvpVal : [0.228, 0.0, 0.0, 0.477]
38
NegSigRate : [0.00735294 0.00490196 0.         0.01960784]  , PosSigRate : [0.25245098 0.375      0.         0.11764706]  , SurvpVal : [0.053, 0.0, 0.0, 0.0]
39
NegSigRate : [0.00241546 0.00241546 0.         0.02415459]  , PosSigRate : [0.23429952 0.59178744 0.         0.23188406]  , SurvpVal : [0.023, 0.0, 0.0, 0.0]
40
NegSigRate : [0.00982801 0.004914   0.         0.02702703]  , PosSigRate : [0.24570025 0.39066339 0.         0.19164619]  , SurvpVal : [0.017, 0.0, 0.0, 0.0]
41
NegSigRate : [0.00282486 0.         0.         0.0480226 ]  , PosSigRate : [0.12146893 0.65254237 0.10734463 0.25988701]  , SurvpVal : [0.001, 0.0, 0.0, 0.0]
42
NegSigRate : [0.         0.         0.         0.05688623]  , PosSigRate : [0.11077844 0.66467066 0.00299401 0.26646707]  , SurvpVal : [0.001, 0.0, 0.0, 0.0]
43


  scores = weights * exp(dot(X, beta))
  denom = 1.0 / np.array([risk_phi])
  a1 = risk_phi_x_x * denom
  summand = numer * denom[:, None]
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  a1 = einsum("ab,i->ab", risk_phi_x_x, denom) - einsum("ab,i->ab", tie_phi_x_x, increasing_proportion * denom)
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()


NegSigRate : [0.003125 0.003125 0.       0.040625]  , PosSigRate : [0.196875 0.60625  0.0125   0.209375]  , SurvpVal : [0.029, 0.0, 1.0, 0.0]
44
NegSigRate : [0.         0.         0.         0.03642384]  , PosSigRate : [0.08940397 0.65562914 0.01655629 0.28807947]  , SurvpVal : [0.0, 0.0, 0.0, 0.0]
45


  scores = weights * exp(dot(X, beta))
  denom = 1.0 / np.array([risk_phi])
  a1 = risk_phi_x_x * denom
  summand = numer * denom[:, None]
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)


NegSigRate : [0.         0.         0.         0.04013378]  , PosSigRate : [0.07692308 0.69230769 0.2541806  0.40133779]  , SurvpVal : [0.0, 1.0, 0.0, 0.0]
46


  scores = weights * exp(dot(X, beta))
  denom = 1.0 / np.array([risk_phi])
  a1 = risk_phi_x_x * denom
  summand = numer * denom[:, None]
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  a1 = einsum("ab,i->ab", risk_phi_x_x, denom) - einsum("ab,i->ab", tie_phi_x_x, increasing_proportion * denom)
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()


NegSigRate : [0.         0.         0.         0.03412969]  , PosSigRate : [0.17064846 0.79863481 0.04778157 0.35836177]  , SurvpVal : [0.0, 0.0, 1.0, 0.0]
47


  scores = weights * exp(dot(X, beta))
  denom = 1.0 / np.array([risk_phi])
  a1 = risk_phi_x_x * denom
  summand = numer * denom[:, None]
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  a1 = einsum("ab,i->ab", risk_phi_x_x, denom) - einsum("ab,i->ab", tie_phi_x_x, increasing_proportion * denom)
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()


NegSigRate : [0.         0.         0.         0.03806228]  , PosSigRate : [0.29757785 0.92041522 0.01730104 0.37370242]  , SurvpVal : [0.003, 0.0, 1.0, 0.0]
48


  scores = weights * exp(dot(X, beta))
  denom = 1.0 / np.array([risk_phi])
  a1 = risk_phi_x_x * denom
  summand = numer * denom[:, None]
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  a1 = einsum("ab,i->ab", risk_phi_x_x, denom) - einsum("ab,i->ab", tie_phi_x_x, increasing_proportion * denom)
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()


NegSigRate : [0.00347222 0.         0.         0.05902778]  , PosSigRate : [0.38194444 0.95833333 0.         0.37152778]  , SurvpVal : [0.051, 0.0, 1.0, 0.0]
49


  scores = weights * exp(dot(X, beta))
  a1 = risk_phi_x_x * denom
  summand = numer * denom[:, None]
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()


NegSigRate : [0.         0.         0.         0.07801418]  , PosSigRate : [0.25886525 0.9787234  0.65248227 0.4858156 ]  , SurvpVal : [0.0, 0.0, 1.0, 0.0]


### Priority-based filter out by metrics

In [7]:
MetricTable = pd.read_csv(SavePath+ModelName+'_MetricTable_'+str(WeightID)+'_Filt'+str(NumGene_CL)+'.csv')

## Procedure for priority-based model selection by metrics
NegMetricList = ['IndCentRatio', 'MinABSSurvCoef', 'AvgABSSurvCoef',  'MinNegSigRate', 'AvgNegSigRate', 'MinABSGeCohD', 'AvgABSGeCohD']
PosMetricList = ['IndCentRatio', 'MinABSSurvCoef', 'AvgABSSurvCoef', 'MinPosSigRate', 'AvgPosSigRate', 'MinABSGeCohD', 'AvgABSGeCohD']

NegAggMetricRank, NegBestModel =  Aggregation(MetricTable, NegMetricList)
PosAggMetricRank, PosBestModel =  Aggregation(MetricTable, PosMetricList)

NegAggMetricRank.to_csv(SavePath+ModelName+'_Neg_AggMetricRank_'+str(WeightID)+'_Filt'+str(NumGene_CL)+'.csv',index=False)
PosAggMetricRank.to_csv(SavePath+ModelName+'_Pos_AggMetricRank_'+str(WeightID)+'_Filt'+str(NumGene_CL)+'.csv',index=False)



1
N obs with filter of IndCentRatio : 4
N obs with filter of MinABSSurvCoef : 4
N obs with filter of AvgABSSurvCoef : 4
N obs with filter of MinNegSigRate : 4
N obs with filter of AvgNegSigRate : 4
N obs with filter of MinABSGeCohD : 4
N obs with filter of AvgABSGeCohD : 4

2
N obs with filter of IndCentRatio : 4
N obs with filter of MinABSSurvCoef : 4
N obs with filter of AvgABSSurvCoef : 4
N obs with filter of MinNegSigRate : 4
N obs with filter of AvgNegSigRate : 4
N obs with filter of MinABSGeCohD : 4
N obs with filter of AvgABSGeCohD : 4

1
N obs with filter of IndCentRatio : 4
N obs with filter of MinABSSurvCoef : 4
N obs with filter of AvgABSSurvCoef : 4
N obs with filter of MinPosSigRate : 4
N obs with filter of AvgPosSigRate : 4
N obs with filter of MinABSGeCohD : 4
N obs with filter of AvgABSGeCohD : 4

2
N obs with filter of IndCentRatio : 4
N obs with filter of MinABSSurvCoef : 4
N obs with filter of AvgABSSurvCoef : 4
N obs with filter of MinPosSigRate : 4
N obs with filte

  AggMetricTable = AggMetricTable.append(SelMetric)
  AggMetricTable = AggMetricTable.append(SelMetric)
  AggMetricTable = AggMetricTable.append(SelMetric)
  AggMetricTable = AggMetricTable.append(SelMetric)
