In [1]:
import pickle
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import statsmodels.api as sm
import seaborn as sns
import umap
import re

import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.models import Model ,load_model
from tensorflow.keras.layers import Input, Dense,Concatenate, Reshape, Activation, BatchNormalization, Embedding, Dot, Dropout

from lifelines import CoxPHFitter
from Models.CFR import SetModel
from Module.DataProcessing import DataLoad
from Module.MetricsGroup import DoMetric, DoAggMetric, DoSimEval


os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="3"




### Data load

In [2]:
StackedData, IntToGene, TTE, EVENT, TrIndEmbeddMask, ReferencePatIDLong, ReferencePatIDShort, NormDismInd, MergedData= DataLoad()

PatIDX = StackedData[:, 0:1].astype('int')
GeneIDX = StackedData[:, 1:2].astype('int')
GeneExp = StackedData[:, 2:3]

IndN = len(np.unique(PatIDX))
FeatN = len(np.unique(GeneIDX))

## Setting for tasks

In [4]:
ModelList = os.listdir('./Results/')
ModelList = [i for i in ModelList if 'M01' in i ]
FilePath = './Results/'

# Parameters for post-hoc models
EmbedSize = 50
NCL_Feat = 5
NCL_Ind = 2
AdjCosWeight_ = 1. # This parameter affects only training phases, so thus any float number can be set in the post analysis phases. 
NumGene_CL = 100

# Model structure load
CFR, LayerList = SetModel(IndN, FeatN)

# Data for calculating metric
DataMetric = [MergedData, TTE, EVENT, NCL_Ind, NCL_Feat, NumGene_CL, IntToGene]


ColList = ['Model','AvgtPRate', 'AvgtAdjPRate', 'MintAdjPRate', 'AvgABSGeCohD', 'MinABSGeCohD', 'AvgABSSurvCoef', 'MinABSSurvCoef', 'AvgSurvpVal', 
           'MaxSurvpVal', 'NegExpAvgSurvpVal', 'NegExpMinSurvpVal', 'AvgNegSigRate',  'MinNegSigRate', 'AvgPosSigRate', 'MinPosSigRate','IndCentRatio']


In [5]:
len(ModelList)

500

## Procedure for model evaluation

In [6]:
MetricTable = pd.DataFrame(columns=ColList)
InfoFeatGroupList = []

for num, model in enumerate(ModelList[:]):
    print(num)
    

    CFR.load_weights(FilePath + model)  # Model weights load
    InpInd, InpFeat, IndEmbeddWeig, FeatEmbeddWeig, IndCentroid, FeatCentroid, ICosCLSim, FCosCLSim = LayerList
        
    # Metric calculation: InfoFeatGroup will be used in UMAP analysis
    metrics, InfoFeatGroup = DoMetric (DataMetric, [InpInd, InpFeat, IndEmbeddWeig, FeatEmbeddWeig, IndCentroid, FeatCentroid, ICosCLSim, FCosCLSim])
    InfoFeatGroupList.append(InfoFeatGroup)
    print(InfoFeatGroup)
    MetricTable = pd.concat([MetricTable, pd.DataFrame([[model] + metrics], columns=ColList)], axis=0)


#MetricTable = DoAggMetric(AggMetricList, MetricTable)
MetricTable['GroupM'] = np.array([re.findall('.\d+', i)[1][1:] for i in  MetricTable['Model']])
MetricTable['EpNum'] = np.array([ re.findall('.\d+\.', i)[0][1:-1] for i in  MetricTable['Model']]).astype('int')
MetricTable = MetricTable.sort_values(['GroupM','EpNum'])


MetricTable.to_csv('./MetricTables/CFRMetricTable_Filt'+str(NumGene_CL)+'.csv',index=False)

[array([0.026, 0.32 , 0.   , 0.032]), array([0.846, 0.002, 0.4  , 0.434]), [0.658, 0.448, 0.011, 0.074]]
482
[array([0.026, 0.332, 0.   , 0.09 ]), array([0.836, 0.002, 0.654, 0.394]), [0.43, 0.448, 0.014, 0.033]]
483
[array([0.018, 0.336, 0.   , 0.06 ]), array([0.88 , 0.   , 0.612, 0.42 ]), [0.809, 0.448, 0.038, 0.093]]
484
[array([0.026, 0.338, 0.002, 0.082]), array([0.844, 0.002, 0.71 , 0.332]), [0.993, 0.448, 0.149, 0.106]]
485
[array([0.018, 0.328, 0.002, 0.06 ]), array([0.87 , 0.   , 0.77 , 0.374]), [0.813, 0.448, 0.131, 0.041]]
486
[array([0.018, 0.604, 0.   , 0.054]), array([0.872, 0.   , 0.648, 0.43 ]), [0.607, 0.537, 0.018, 0.025]]
487
[array([0.02 , 0.34 , 0.   , 0.058]), array([0.844, 0.   , 0.592, 0.422]), [0.494, 0.448, 0.047, 0.032]]
488
[array([0.864, 0.334, 0.   , 0.054]), array([0.02 , 0.   , 0.742, 0.47 ]), [0.839, 0.448, 0.007, 0.021]]
489
[array([0.882, 0.332, 0.002, 0.064]), array([0.026, 0.   , 0.72 , 0.406]), [0.998, 0.448, 0.011, 0.095]]
490
[array([0.842, 0.336

## Priority-based filter out by metrics

In [6]:
def Aggregation(MetricTable,AggMetricList):
    AggMetricTable = DoSimEval(MetricTable, 'MaxSurvpVal',pCutoff, AggMetricList, ExcRate, NmodEahG)
    AggMetricRank = DoAggMetric(AggMetricList, AggMetricTable[['Model']+AggMetricList]).sort_values('Metrics')
    AggMetricRank = pd.merge(AggMetricRank, AggMetricTable[['Model','MaxSurvpVal']], on='Model', how='left')
    BestModel = AggMetricRank.sort_values('Metrics').iloc[-1]
    
    return AggMetricRank, BestModel

In [7]:
pCutoff = 0.005
ExcRate = 0.2
NmodEahG = 1

NegMetricList = ['IndCentRatio', 'MinABSSurvCoef', 'AvgABSSurvCoef',  'MinNegSigRate', 'AvgNegSigRate', 'MinABSGeCohD', 'AvgABSGeCohD']
PosMetricList = ['IndCentRatio', 'MinABSSurvCoef', 'AvgABSSurvCoef', 'MinPosSigRate', 'AvgPosSigRate', 'MinABSGeCohD', 'AvgABSGeCohD']

MetricTable = pd.read_csv('./MetricTables/CFRMetricTable_Filt'+str(NumGene_CL)+'.csv')
MetricTable['GroupM'] = np.array([re.findall('.\d+', i)[1][1:] for i in  MetricTable['Model']])
MetricTable['EpNum'] = np.array([ re.findall('.\d+\.', i)[0][1:-1] for i in  MetricTable['Model']]).astype('int')


In [8]:
NegAggMetricRank, NegBestModel =  Aggregation(MetricTable, NegMetricList)
PosAggMetricRank, PosBestModel =  Aggregation(MetricTable, PosMetricList)

NegAggMetricRank.to_csv('./MetricTables/CFR_Neg_AggMetricRank_Filt'+str(NumGene_CL)+'.csv',index=False)
PosAggMetricRank.to_csv('./MetricTables/CFR_Pos_AggMetricRank_Filt'+str(NumGene_CL)+'.csv',index=False)

1
N obs with filter of IndCentRatio : 80
N obs with filter of MinABSSurvCoef : 64
N obs with filter of AvgABSSurvCoef : 52
N obs with filter of MinNegSigRate : 42
N obs with filter of AvgNegSigRate : 34
N obs with filter of MinABSGeCohD : 28
N obs with filter of AvgABSGeCohD : 23

2
N obs with filter of IndCentRatio : 80
N obs with filter of MinABSSurvCoef : 64
N obs with filter of AvgABSSurvCoef : 52
N obs with filter of MinNegSigRate : 42
N obs with filter of AvgNegSigRate : 34
N obs with filter of MinABSGeCohD : 28
N obs with filter of AvgABSGeCohD : 23

3
N obs with filter of IndCentRatio : 80
N obs with filter of MinABSSurvCoef : 64
N obs with filter of AvgABSSurvCoef : 52
N obs with filter of MinNegSigRate : 42
N obs with filter of AvgNegSigRate : 34
N obs with filter of MinABSGeCohD : 28
N obs with filter of AvgABSGeCohD : 23

4
N obs with filter of IndCentRatio : 80
N obs with filter of MinABSSurvCoef : 64
N obs with filter of AvgABSSurvCoef : 52
N obs with filter of MinNegSigR

In [9]:
NegAggMetricRank[NegMetricList].describe()

Unnamed: 0,IndCentRatio,MinABSSurvCoef,AvgABSSurvCoef,MinNegSigRate,AvgNegSigRate,MinABSGeCohD,AvgABSGeCohD
count,5.0,5.0,5.0,5.0,5.0,5.0,5.0
mean,0.006397,0.414806,4.457449,0.0,0.0487,0.449164,0.870471
std,0.0,0.046202,0.057371,0.0,0.009391,0.000833,0.033783
min,0.006397,0.332158,4.431792,0.0,0.0445,0.447738,0.854478
25%,0.006397,0.435469,4.431792,0.0,0.0445,0.449214,0.855399
50%,0.006397,0.435469,4.431792,0.0,0.0445,0.449489,0.855399
75%,0.006397,0.435469,4.431792,0.0,0.0445,0.449489,0.856187
max,0.006397,0.435469,4.560077,0.0,0.0655,0.449892,0.930895


In [10]:
PosAggMetricRank[PosMetricList].describe()

Unnamed: 0,IndCentRatio,MinABSSurvCoef,AvgABSSurvCoef,MinPosSigRate,AvgPosSigRate,MinABSGeCohD,AvgABSGeCohD
count,5.0,5.0,5.0,5.0,5.0,5.0,5.0
mean,0.005112,0.302619,4.501703,0.004,0.4178,0.45744,0.857778
std,0.001173,0.148941,0.085124,0.003742,0.018593,0.008311,0.041863
min,0.004255,0.147312,4.38823,0.0,0.3955,0.447738,0.830156
25%,0.004255,0.147312,4.431792,0.0,0.4075,0.449214,0.836677
50%,0.004255,0.332158,4.560077,0.006,0.42,0.461938,0.836686
75%,0.006397,0.435469,4.564207,0.006,0.4205,0.462725,0.854478
max,0.006397,0.450846,4.564207,0.008,0.4455,0.465587,0.930895
