In [1]:
import pickle
import os
import sys
import pandas as pd
import numpy as np
from numpy import mean as npMean
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import seaborn as sns
import umap
import re

import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.models import Model ,load_model


from lifelines import CoxPHFitter
from Models.RCFR_RefLong import SetModel
from Module.DataProcessing import DataLoad
from Module.MetricsGroup import DoMetric, DoAggMetric, DoSimEval

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="3"



### Data load

In [2]:
StackedData, IntToGene, TTE, EVENT, TrIndEmbeddMask, ReferencePatIDLong, ReferencePatIDShort, NormDismInd, MergedData= DataLoad()

PatIDX = StackedData[:, 0:1].astype('int')
GeneIDX = StackedData[:, 1:2].astype('int')
GeneExp = StackedData[:, 2:3]

IndN = len(np.unique(PatIDX))
FeatN = len(np.unique(GeneIDX))

## Setting for tasks

In [3]:
ModelList = os.listdir('./Results/')
ModelList = [i for i in ModelList if 'M02' in i and 'NoRiskRank' not in i]

FilePath = './Results/'

# Parameters for post-hoc models
EmbedSize = 50
NCL_Feat = 5
NCL_Ind = 2
AdjCosWeight_ = 1. # This parameter affects only training phases, so thus any float number can be set in the post analysis phases. 
NumGene_CL = 100

# Model structure load
RCFR, LayerList = SetModel( NormDismInd, TrIndEmbeddMask, IndN, FeatN, ReferencePatIDLong, ReferencePatIDShort)

# Data for calculating metric
DataMetric = [MergedData, TTE, EVENT, NCL_Ind, NCL_Feat, NumGene_CL, IntToGene]


ColList = ['Model','AvgtPRate', 'AvgtAdjPRate', 'MintAdjPRate', 'AvgABSGeCohD', 'MinABSGeCohD', 'AvgABSSurvCoef', 'MinABSSurvCoef', 'AvgSurvpVal', 
           'MaxSurvpVal', 'NegExpAvgSurvpVal', 'NegExpMinSurvpVal', 'AvgNegSigRate',  'MinNegSigRate', 'AvgPosSigRate', 'MinPosSigRate','IndCentRatio']


## Procedure for Selecting best mode

In [4]:
MetricTable = pd.DataFrame(columns=ColList)
InfoFeatGroupList = []

for num, model in enumerate(ModelList[:]):
    print(num)
    

    RCFR.load_weights(FilePath + model)  # Model weights load
    InpInd, InpFeat, IndEmbeddWeig, IndEmbeddReferenceLong, FeatEmbeddWeig, IndCentroid, FeatCentroid, ICosCLSim, FCosCLSim = LayerList
        
    # Metric calculation: InfoFeatGroup will be used in UMAP analysis
    metrics, InfoFeatGroup = DoMetric (DataMetric, [InpInd, InpFeat, IndEmbeddWeig, FeatEmbeddWeig, IndCentroid, FeatCentroid, ICosCLSim, FCosCLSim])
    InfoFeatGroupList.append(InfoFeatGroup)
    print(InfoFeatGroup)
    MetricTable = pd.concat([MetricTable, pd.DataFrame([[model] + metrics], columns=ColList)], axis=0)


#MetricTable = DoAggMetric(AggMetricList, MetricTable)
MetricTable['GroupM'] = np.array([re.findall('.\d+', i)[1][1:] for i in  MetricTable['Model']])
MetricTable['EpNum'] = np.array([ re.findall('.\d+\.', i)[0][1:-1] for i in  MetricTable['Model']]).astype('int')
MetricTable = MetricTable.sort_values(['GroupM','EpNum'])

MetricTable.to_csv('./MetricTables/RCFRMetricTable_Filt'+str(NumGene_CL)+'.csv',index=False)

0
[array([0., 0., 0., 0.]), array([0., 0., 0., 0.]), [0.612, 0.333, 0.544, 0.621]]
1
[array([0.        , 0.        , 0.00022222, 0.01577778]), array([0.        , 0.        , 0.        , 0.01666667]), [0.899, 0.056, 0.778, 0.503]]
2
[array([0.02688889, 0.84066667, 0.01755556, 0.14222222]), array([0.        , 0.00177778, 0.004     , 0.11955556]), [0.412, 0.009, 0.103, 0.002]]
3
[array([0.03911111, 0.77755556, 0.        , 0.15777778]), array([0.00044444, 0.00155556, 0.        , 0.16977778]), [0.021, 0.0, 0.0, 0.0]]
4
[array([0.03333333, 0.73333333, 0.        , 0.17111111]), array([0.00044444, 0.00222222, 0.        , 0.21822222]), [0.0, 0.0, 0.0, 0.0]]
5
[array([0.00822222, 0.72888889, 0.00777778, 0.17022222]), array([0.        , 0.00266667, 0.        , 0.23155556]), [0.0, 0.0, 0.0, 0.0]]
6
[array([0.00822222, 0.67044444, 0.05422222, 0.18555556]), array([0.00022222, 0.00266667, 0.00355556, 0.21866667]), [0.0, 0.0, 0.0, 0.0]]
7
[array([0.00777778, 0.72377778, 0.10466667, 0.19622222]), array




[array([0.02622222, 0.78488889, 0.15222222, 0.18688889]), array([0.00044444, 0.00155556, 0.04711111, 0.20066667]), [0.994, 0.0, 0.0, 0.0]]
12
[array([0.02333333, 0.79155556, 0.11888889, 0.18666667]), array([0.00044444, 0.00155556, 0.01377778, 0.16466667]), [0.0, 0.0, 0.0, 0.0]]
13
[array([0.02555556, 0.80622222, 0.14955556, 0.19511111]), array([0.00022222, 0.00133333, 0.01911111, 0.15511111]), [0.0, 0.0, 0.0, 0.0]]
14
[array([0.02755556, 0.81911111, 0.09222222, 0.19466667]), array([0.00022222, 0.00155556, 0.008     , 0.12644444]), [0.0, 0.0, 0.0, 0.0]]
15
[array([0.074     , 0.73622222, 0.08266667, 0.18555556]), array([0.00044444, 0.00177778, 0.00488889, 0.11288889]), [0.0, 0.0, 0.0, 0.0]]
16
[array([0.06266667, 0.824     , 0.19022222, 0.20888889]), array([0.00044444, 0.00066667, 0.06088889, 0.14288889]), [0.0, 0.0, 0.0, 0.0]]
17
[array([0.03955556, 0.8       , 0.19644444, 0.17133333]), array([0.00044444, 0.00088889, 0.04133333, 0.12688889]), [0.0, 0.0, 0.0, 0.0]]
18
[array([0.09155556

  scores = weights * exp(dot(X, beta))
  denom = 1.0 / np.array([risk_phi])
  a1 = risk_phi_x_x * denom
  summand = numer * denom[:, None]
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  a1 = einsum("ab,i->ab", risk_phi_x_x, denom) - einsum("ab,i->ab", tie_phi_x_x, increasing_proportion * denom)
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()


[array([0.00088889, 0.00022222, 0.        , 0.00822222]), array([0.36888889, 0.96866667, 0.        , 0.844     ]), [0.307, 0.691, 1.0, 0.226]]
39




[array([0.00133333, 0.        , 0.        , 0.01644444]), array([0.34311111, 0.94577778, 0.        , 0.768     ]), [0.18, 0.006, 0.566, 0.0]]
40
[array([0.00222222, 0.        , 0.        , 0.04844444]), array([0.154     , 0.86466667, 0.        , 0.62955556]), [0.077, 0.0, 0.0, 0.0]]
41
[array([0.00155556, 0.00022222, 0.        , 0.05022222]), array([0.14911111, 0.62733333, 0.        , 0.52133333]), [0.046, 0.0, 0.0, 0.0]]
42
[array([0.00066667, 0.00044444, 0.        , 0.04711111]), array([0.026     , 0.47      , 0.        , 0.48577778]), [0.0, 0.0, 0.0, 0.0]]
43
[array([0.00088889, 0.00022222, 0.        , 0.04511111]), array([0.00711111, 0.14866667, 0.        , 0.46933333]), [0.0, 0.0, 0.0, 0.0]]
44
[array([0.00222222, 0.        , 0.00155556, 0.05888889]), array([0.17066667, 0.69155556, 0.45755556, 0.65311111]), [0.0, 0.0, 0.0, 0.0]]
45
[array([0.00177778, 0.00222222, 0.00022222, 0.06577778]), array([0.14111111, 0.21777778, 0.34822222, 0.53466667]), [0.0, 0.0, 0.0, 0.0]]
46
[array([0.0

  scores = weights * exp(dot(X, beta))
  denom = 1.0 / np.array([risk_phi])
  a1 = risk_phi_x_x * denom
  summand = numer * denom[:, None]
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  a1 = einsum("ab,i->ab", risk_phi_x_x, denom) - einsum("ab,i->ab", tie_phi_x_x, increasing_proportion * denom)
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()


[array([0.16466667, 0.782     , 0.61355556, 0.09488889]), array([0.00466667, 0.00022222, 0.012     , 0.11066667]), [0.231, 0.0, 0.0, 1.0]]
79
[array([0.05377778, 0.62177778, 0.57288889, 0.09844444]), array([0.00133333, 0.        , 0.00911111, 0.10555556]), [0.52, 0.0, 0.0, 0.0]]
80
[array([0.02133333, 0.44333333, 0.40777778, 0.10133333]), array([0.00022222, 0.        , 0.00622222, 0.10022222]), [0.009, 0.0, 0.0, 0.0]]
81
[array([0.34377778, 0.86955556, 0.67177778, 0.17333333]), array([0.00222222, 0.00022222, 0.        , 0.15511111]), [0.003, 0.0, 0.0, 0.0]]
82
[array([0.31288889, 0.87711111, 0.54666667, 0.17177778]), array([0.00066667, 0.00022222, 0.00022222, 0.15511111]), [0.001, 0.0, 0.0, 0.0]]
83
[array([0.16488889, 0.85933333, 0.47955556, 0.17088889]), array([0.00155556, 0.00022222, 0.        , 0.16022222]), [0.0, 0.0, 0.0, 0.0]]
84
[array([0.20511111, 0.80044444, 0.26844444, 0.14622222]), array([0.00155556, 0.00044444, 0.        , 0.11466667]), [0.0, 0.0, 0.0, 0.0]]
85
[array([0.2




[array([0.02355556, 0.78555556, 0.15355556, 0.18755556]), array([0.00044444, 0.00133333, 0.04733333, 0.202     ]), [0.995, 0.0, 0.0, 0.0]]
112
[array([0.01777778, 0.77555556, 0.13466667, 0.19      ]), array([0.00022222, 0.00177778, 0.01533333, 0.16622222]), [0.0, 0.0, 0.0, 0.0]]
113
[array([0.02422222, 0.79288889, 0.14355556, 0.19355556]), array([0.00044444, 0.00111111, 0.02      , 0.15733333]), [0.0, 0.0, 0.0, 0.0]]
114
[array([0.042     , 0.81711111, 0.07866667, 0.18666667]), array([0.00044444, 0.002     , 0.004     , 0.13511111]), [0.0, 0.0, 0.0, 0.0]]
115
[array([0.086     , 0.72888889, 0.05044444, 0.18955556]), array([0.00066667, 0.00177778, 0.00111111, 0.11911111]), [0.0, 0.0, 0.0, 0.0]]
116
[array([0.04066667, 0.84555556, 0.20377778, 0.19088889]), array([0.00022222, 0.00111111, 0.04733333, 0.166     ]), [0.0, 0.0, 0.0, 0.0]]
117
[array([0.03222222, 0.79777778, 0.18888889, 0.18755556]), array([0.00044444, 0.00111111, 0.01511111, 0.11755556]), [0.0, 0.0, 0.0, 0.0]]
118
[array([0.1

  scores = weights * exp(dot(X, beta))
  denom = 1.0 / np.array([risk_phi])
  a1 = risk_phi_x_x * denom
  summand = numer * denom[:, None]
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  a1 = einsum("ab,i->ab", risk_phi_x_x, denom) - einsum("ab,i->ab", tie_phi_x_x, increasing_proportion * denom)
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()


[array([0.86711111, 0.97022222, 0.        , 0.914     ]), array([0.00088889, 0.        , 0.        , 0.00622222]), [0.956, 0.724, 1.0, 0.06]]
138
[array([0.00133333, 0.00022222, 0.        , 0.00466667]), array([0.86577778, 0.96377778, 0.01555556, 0.90511111]), [0.031, 0.429, 0.0, 0.855]]
139
[array([0.00111111, 0.00022222, 0.        , 0.01088889]), array([0.63933333, 0.96466667, 0.08      , 0.85777778]), [0.001, 0.379, 0.0, 0.0]]
140


  scores = weights * exp(dot(X, beta))
  a1 = risk_phi_x_x * denom
  summand = numer * denom[:, None]
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)


[array([0.00133333, 0.        , 0.        , 0.02822222]), array([0.408     , 0.93555556, 0.02044444, 0.78555556]), [0.0, 0.009, 1.0, 0.0]]
141
[array([0.00088889, 0.        , 0.        , 0.04155556]), array([0.24311111, 0.84555556, 0.024     , 0.70088889]), [0.0, 0.0, 0.0, 0.0]]
142
[array([0.00133333, 0.00066667, 0.        , 0.05533333]), array([0.23088889, 0.66466667, 0.        , 0.60844444]), [0.0, 0.0, 0.0, 0.0]]
143
[array([0.00155556, 0.00066667, 0.        , 0.05933333]), array([0.144     , 0.44133333, 0.        , 0.52      ]), [0.0, 0.0, 0.0, 0.0]]
144
[array([0.00044444, 0.00066667, 0.        , 0.05022222]), array([0.05311111, 0.4       , 0.00111111, 0.47977778]), [0.002, 0.0, 0.0, 0.0]]
145
[array([0.00088889, 0.        , 0.00244444, 0.04666667]), array([0.13688889, 0.59066667, 0.44111111, 0.69733333]), [0.0, 0.0, 0.0, 0.0]]
146
[array([0.00044444, 0.00088889, 0.00155556, 0.06666667]), array([0.08066667, 0.43      , 0.39066667, 0.576     ]), [0.0, 0.0, 0.0, 0.0]]
147
[array([0

  scores = weights * exp(dot(X, beta))
  denom = 1.0 / np.array([risk_phi])
  a1 = risk_phi_x_x * denom
  summand = numer * denom[:, None]
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  a1 = einsum("ab,i->ab", risk_phi_x_x, denom) - einsum("ab,i->ab", tie_phi_x_x, increasing_proportion * denom)
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()


[array([0.00155556, 0.86977778, 0.54377778, 0.15088889]), array([2.68000000e-01, 2.22222222e-04, 6.66666667e-04, 1.12222222e-01]), [1.0, 0.0, 0.0, 0.0]]
180
[array([0.16311111, 0.76133333, 0.33622222, 0.16088889]), array([0.00066667, 0.00022222, 0.00066667, 0.11644444]), [0.0, 0.0, 0.0, 0.0]]
181
[array([0.18422222, 0.814     , 0.45977778, 0.16044444]), array([0.00177778, 0.00022222, 0.        , 0.17111111]), [0.0, 0.0, 0.0, 0.0]]
182
[array([0.22333333, 0.86466667, 0.25466667, 0.16044444]), array([0.00066667, 0.00022222, 0.        , 0.17666667]), [0.0, 0.0, 0.0, 0.0]]
183
[array([0.21755556, 0.85044444, 0.15266667, 0.15222222]), array([0.00133333, 0.00022222, 0.        , 0.17688889]), [0.0, 0.0, 0.0, 0.0]]
184
[array([0.18311111, 0.80755556, 0.23911111, 0.14155556]), array([0.00133333, 0.00022222, 0.        , 0.11555556]), [0.0, 0.0, 0.0, 0.0]]
185
[array([0.16644444, 0.86422222, 0.19244444, 0.16755556]), array([0.00044444, 0.00066667, 0.00044444, 0.17511111]), [0.0, 0.0, 0.0, 0.0]]
1

MemoryError: Unable to allocate 21.6 MiB for an array with shape (4500, 628) and data type float64

## Priority-based filter out by metrics

In [4]:
def Aggregation(MetricTable,AggMetricList):
    AggMetricTable = DoSimEval(MetricTable, 'MaxSurvpVal',pCutoff, AggMetricList, ExcRate, NmodEahG)
    AggMetricRank = DoAggMetric(AggMetricList, AggMetricTable[['Model']+AggMetricList]).sort_values('Metrics')
    AggMetricRank = pd.merge(AggMetricRank, AggMetricTable[['Model','MaxSurvpVal']], on='Model', how='left')
    BestModel = AggMetricRank.sort_values('Metrics').iloc[-1]
    
    return AggMetricRank, BestModel

In [5]:
pCutoff = 0.005
ExcRate = 0.2
NmodEahG = 1

NegMetricList = ['IndCentRatio', 'MinABSSurvCoef', 'AvgABSSurvCoef',  'MinNegSigRate', 'AvgNegSigRate', 'MinABSGeCohD', 'AvgABSGeCohD']
PosMetricList = ['IndCentRatio', 'MinABSSurvCoef', 'AvgABSSurvCoef', 'MinPosSigRate', 'AvgPosSigRate', 'MinABSGeCohD', 'AvgABSGeCohD']

MetricTable = pd.read_csv('./MetricTables/RCFRMetricTable_Filt'+str(NumGene_CL)+'.csv')
MetricTable['GroupM'] = np.array([re.findall('.\d+', i)[1][1:] for i in  MetricTable['Model']])
MetricTable['EpNum'] = np.array([ re.findall('.\d+\.', i)[0][1:-1] for i in  MetricTable['Model']]).astype('int')


In [6]:
NegAggMetricRank, NegBestModel =  Aggregation(MetricTable, NegMetricList)
PosAggMetricRank, PosBestModel =  Aggregation(MetricTable, PosMetricList)

NegAggMetricRank.to_csv('./MetricTables/RCFR_Neg_AggMetricRank_Filt'+str(NumGene_CL)+'.csv',index=False)
PosAggMetricRank.to_csv('./MetricTables/RCFR_Pos_AggMetricRank_Filt'+str(NumGene_CL)+'.csv',index=False)

1
N obs with filter of IndCentRatio : 54
N obs with filter of MinABSSurvCoef : 44
N obs with filter of AvgABSSurvCoef : 36
N obs with filter of MinNegSigRate : 29
N obs with filter of AvgNegSigRate : 24
N obs with filter of MinABSGeCohD : 20
N obs with filter of AvgABSGeCohD : 16

2
N obs with filter of IndCentRatio : 54
N obs with filter of MinABSSurvCoef : 44
N obs with filter of AvgABSSurvCoef : 36
N obs with filter of MinNegSigRate : 29
N obs with filter of AvgNegSigRate : 24
N obs with filter of MinABSGeCohD : 20
N obs with filter of AvgABSGeCohD : 16

3
N obs with filter of IndCentRatio : 56
N obs with filter of MinABSSurvCoef : 45
N obs with filter of AvgABSSurvCoef : 36
N obs with filter of MinNegSigRate : 29
N obs with filter of AvgNegSigRate : 24
N obs with filter of MinABSGeCohD : 20
N obs with filter of AvgABSGeCohD : 16

4
N obs with filter of IndCentRatio : 54
N obs with filter of MinABSSurvCoef : 44
N obs with filter of AvgABSSurvCoef : 36
N obs with filter of MinNegSigR

In [23]:
NegAggMetricRank[NegMetricList].describe()

Unnamed: 0,IndCentRatio,MinABSSurvCoef,AvgABSSurvCoef,MinNegSigRate,AvgNegSigRate,MinABSGeCohD,AvgABSGeCohD
count,5.0,5.0,5.0,5.0,5.0,5.0,5.0
mean,0.404412,2.712378,3.339424,0.14,0.4131,0.109997,0.269578
std,0.015519,0.058465,0.045165,0.005099,0.00798,0.003747,0.00432
min,0.384956,2.648309,3.261413,0.132,0.4045,0.104921,0.263607
25%,0.391111,2.652301,3.338649,0.138,0.409,0.107135,0.268354
50%,0.40991,2.734024,3.364039,0.142,0.411,0.112116,0.26881
75%,0.41629,2.752739,3.36615,0.144,0.4155,0.112288,0.271943
max,0.419795,2.774515,3.366869,0.144,0.4255,0.113526,0.275174


In [24]:
PosAggMetricRank[PosMetricList].describe()

Unnamed: 0,IndCentRatio,MinABSSurvCoef,AvgABSSurvCoef,MinPosSigRate,AvgPosSigRate,MinABSGeCohD,AvgABSGeCohD
count,5.0,5.0,5.0,5.0,5.0,5.0,5.0
mean,0.372484,1.85231,3.389723,0.0968,0.3957,0.107318,0.19171
std,0.050553,0.273067,0.266193,0.062683,0.083325,0.008065,0.022589
min,0.320675,1.47133,2.946192,0.034,0.3085,0.098865,0.171239
25%,0.326271,1.654997,3.360958,0.06,0.3255,0.100175,0.172667
50%,0.372807,2.002611,3.496278,0.076,0.391,0.108219,0.188601
75%,0.403587,2.064648,3.51074,0.12,0.4445,0.110863,0.199959
max,0.43908,2.067965,3.634443,0.194,0.509,0.11847,0.226084
