In [1]:
import pickle
import os
import sys
import pandas as pd
import numpy as np
from numpy import mean as npMean
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import seaborn as sns
import umap
import re

import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.models import Model ,load_model


from lifelines import CoxPHFitter
from Models.RCFR_noRiskRank import SetModel
from Module.DataProcessing import DataLoad
from Module.MetricsGroup import DoMetric, DoAggMetric, DoSimEval

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="3"





### Data load

In [2]:
StackedData, IntToGene, TTE, EVENT, TrIndEmbeddMask, ReferencePatIDLong, ReferencePatIDShort, NormDismInd, MergedData= DataLoad()

PatIDX = StackedData[:, 0:1].astype('int')
GeneIDX = StackedData[:, 1:2].astype('int')
GeneExp = StackedData[:, 2:3]

IndN = len(np.unique(PatIDX))
FeatN = len(np.unique(GeneIDX))

## Setting for tasks

In [3]:
ModelList = os.listdir('./Results/')
ModelList = [i for i in ModelList if 'M02' in i and 'NoRiskRank' in i ]
FilePath = './Results/'

# Parameters for post-hoc models
EmbedSize = 50
NCL_Feat = 5
NCL_Ind = 2
AdjCosWeight_ = 1. # This parameter affects only training phases, so thus any float number can be set in the post analysis phases. 
NumGene_CL = 100

# Model structure load
RCFR, LayerList = SetModel( NormDismInd, TrIndEmbeddMask, IndN, FeatN, ReferencePatIDLong, ReferencePatIDShort)

# Data for calculating metric
DataMetric = [MergedData, TTE, EVENT, NCL_Ind, NCL_Feat, NumGene_CL, IntToGene]


ColList = ['Model','AvgtPRate', 'AvgtAdjPRate', 'MintAdjPRate', 'AvgABSGeCohD', 'MinABSGeCohD', 'AvgABSSurvCoef', 'MinABSSurvCoef', 'AvgSurvpVal', 
           'MaxSurvpVal', 'NegExpAvgSurvpVal', 'NegExpMinSurvpVal', 'AvgNegSigRate',  'MinNegSigRate', 'AvgPosSigRate', 'MinPosSigRate','IndCentRatio']


## Procedure for Selecting best mode

In [42]:
MetricTable = pd.DataFrame(columns=ColList)
InfoFeatGroupList = []

for num, model in enumerate(ModelList[:]):
    print(num)
    

    RCFR.load_weights(FilePath + model)  # Model weights load
    InpInd, InpFeat, IndEmbeddWeig, IndEmbeddReferenceLong, FeatEmbeddWeig, IndCentroid, FeatCentroid, ICosCLSim, FCosCLSim = LayerList
        
    # Metric calculation: InfoFeatGroup will be used in UMAP analysis
    metrics, InfoFeatGroup = DoMetric (DataMetric, [InpInd, InpFeat, IndEmbeddWeig, FeatEmbeddWeig, IndCentroid, FeatCentroid, ICosCLSim, FCosCLSim])
    InfoFeatGroupList.append(InfoFeatGroup)
    print(InfoFeatGroup)
    MetricTable = pd.concat([MetricTable, pd.DataFrame([[model] + metrics], columns=ColList)], axis=0)


#MetricTable = DoAggMetric(AggMetricList, MetricTable)
MetricTable['GroupM'] = np.array([re.findall('.\d+', i)[1][1:] for i in  MetricTable['Model']])
MetricTable['EpNum'] = np.array([ re.findall('.\d+\.', i)[0][1:-1] for i in  MetricTable['Model']]).astype('int')
MetricTable = MetricTable.sort_values(['GroupM','EpNum'])

MetricTable.to_csv('./MetricTables/RCFR_NoRiskRank_MetricTable.csv',index=False)

0
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0., 0., 0., 0.]), array([0., 0., 0., 0.]), [0.612, 0.333, 0.544, 0.621]]
1
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.   , 0.   , 0.   , 0.014]), array([0.   , 0.   , 0.   , 0.004]), [0.899, 0.061, 0.778, 0.459]]
2
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.118, 0.89 , 0.102, 0.164]), array([0.   , 0.   , 0.026, 0.128]), [0.282, 0.011, 0.15, 0.003]]
3
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.052, 0.778, 0.   , 0.2  ]), array([0.   , 0.002, 0.   , 0.18 ]), [0.003, 0.0, 0.0, 0.0]]
4
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.03 , 0.788, 0.   , 0.152]), array([0.   , 0.006, 0.   , 0.278]), [0.001, 0.0, 0.0, 0.0]]
5
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.018, 0.798, 0.044, 0.176]), array([0.   , 0.004, 0.002, 0.




0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.076, 0.872, 0.076, 0.212]), array([0.002, 0.002, 0.006, 0.162]), [0.994, 0.0, 0.0, 0.0]]
14
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.126, 0.894, 0.074, 0.202]), array([0.002, 0.004, 0.002, 0.148]), [0.0, 0.0, 0.0, 0.0]]
15
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.134, 0.866, 0.092, 0.172]), array([0.002, 0.002, 0.002, 0.172]), [0.0, 0.0, 0.0, 0.0]]
16
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.1  , 0.858, 0.028, 0.15 ]), array([0.002, 0.   , 0.   , 0.122]), [0.0, 0.0, 0.0, 0.0]]
17
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.088, 0.866, 0.008, 0.17 ]), array([0.002, 0.   , 0.   , 0.132]), [0.0, 0.0, 0.0, 0.0]]
18
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.134, 0.826, 0.   , 0.18 ]), array([0.002, 0.   , 0.   , 




0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.17 , 0.962, 0.054, 0.136]), array([0.002, 0.   , 0.   , 0.158]), [0.993, 0.0, 0.0, 0.0]]
30
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.202, 0.976, 0.   , 0.11 ]), array([0.002, 0.   , 0.   , 0.17 ]), [0.0, 0.0, 0.0, 0.0]]
31
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.104, 0.966, 0.028, 0.112]), array([0.002, 0.   , 0.   , 0.168]), [0.0, 0.0, 0.0, 0.0]]
32
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.126, 0.96 , 0.002, 0.114]), array([0.   , 0.   , 0.   , 0.152]), [0.0, 0.0, 0.0, 0.0]]
33
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.252, 0.944, 0.002, 0.108]), array([0.   , 0.   , 0.   , 0.176]), [0.0, 0.0, 0.0, 0.0]]
34
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.214, 0.942, 0.   , 0.104]), array([0.   , 0.   , 0.   , 




0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.226, 0.966, 0.11 , 0.152]), array([0.   , 0.   , 0.   , 0.166]), [0.992, 0.0, 0.0, 0.0]]
39
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.186, 0.97 , 0.018, 0.116]), array([0.   , 0.   , 0.   , 0.166]), [0.0, 0.0, 0.0, 0.0]]
40
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.16 , 0.96 , 0.028, 0.096]), array([0.   , 0.   , 0.   , 0.124]), [0.0, 0.0, 0.0, 0.0]]
41
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.186, 0.972, 0.048, 0.1  ]), array([0.   , 0.   , 0.   , 0.138]), [0.0, 0.0, 0.0, 0.0]]
42
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.228, 0.96 , 0.034, 0.104]), array([0.   , 0.002, 0.   , 0.146]), [0.0, 0.0, 0.0, 0.0]]
43
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.292, 0.942, 0.072, 0.122]), array([0.   , 0.   , 0.   , 




100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.238, 0.978, 0.098, 0.18 ]), array([0.  , 0.  , 0.  , 0.09]), [0.991, 0.0, 0.0, 0.0]]
50
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.4  , 0.974, 0.18 , 0.148]), array([0.   , 0.   , 0.   , 0.104]), [0.0, 0.0, 0.0, 0.0]]
51
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.254, 0.984, 0.092, 0.146]), array([0.   , 0.   , 0.   , 0.092]), [0.0, 0.0, 0.0, 0.0]]
52
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.192, 0.984, 0.008, 0.124]), array([0.   , 0.   , 0.   , 0.086]), [0.0, 0.0, 0.0, 0.0]]
53
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.25 , 0.98 , 0.04 , 0.154]), array([0.   , 0.   , 0.   , 0.072]), [0.0, 0.0, 0.0, 0.0]]
54
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.376, 0.974, 0.028, 0.144]), array([0.  , 0.  , 0.  , 0.12]), [



0
100
200
300
400
0
100
200
300
400
[array([0.258, 0.864, 0.   , 0.182]), array([0.   , 0.   , 0.   , 0.034]), [0.043, 0.0, 0.82, 0.0]]
80



>>> events = df['event'].astype(bool)
>>> print(df.loc[events, 'IndCentMembers'].var())
>>> print(df.loc[~events, 'IndCentMembers'].var())

A very low variance means that the column IndCentMembers completely determines whether a subject dies or not. See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression.




0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.   , 0.898, 0.   , 0.148]), array([0.108, 0.   , 0.   , 0.04 ]), [0.995, 0.0, 0.0, 0.0]]
81
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.51 , 0.988, 0.394, 0.232]), array([0.   , 0.   , 0.   , 0.044]), [0.0, 0.0, 0.0, 0.0]]
82
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.632, 0.986, 0.238, 0.198]), array([0.   , 0.   , 0.   , 0.038]), [0.0, 0.0, 0.0, 0.0]]
83
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.54 , 0.982, 0.164, 0.234]), array([0.  , 0.  , 0.  , 0.06]), [0.0, 0.0, 0.0, 0.0]]
84
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.502, 0.986, 0.078, 0.188]), array([0.   , 0.   , 0.   , 0.042]), [0.0, 0.0, 0.0, 0.0]]
85
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.492, 0.982, 0.024, 0.142]), array([0.   , 0.   , 0.   , 0.05


>>> events = df['event'].astype(bool)
>>> print(df.loc[events, 'IndCentMembers'].var())
>>> print(df.loc[~events, 'IndCentMembers'].var())

A very low variance means that the column IndCentMembers completely determines whether a subject dies or not. See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression.




0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.   , 0.922, 0.   , 0.198]), array([0.174, 0.002, 0.   , 0.048]), [0.996, 0.0, 0.0, 0.0]]
91



>>> events = df['event'].astype(bool)
>>> print(df.loc[events, 'IndCentMembers'].var())
>>> print(df.loc[~events, 'IndCentMembers'].var())

A very low variance means that the column IndCentMembers completely determines whether a subject dies or not. See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression.




0
100
200
300
400
0
100
200
300
400
0
100
200
300
400




0
100
200
300
400
[array([0.   , 0.862, 0.   , 0.136]), array([0.092, 0.002, 0.   , 0.042]), [0.996, 0.0, 0.0, 0.018]]
92
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.574, 0.988, 0.53 , 0.292]), array([0.   , 0.   , 0.   , 0.046]), [0.0, 0.0, 0.0, 0.0]]
93
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.684, 0.984, 0.398, 0.286]), array([0.   , 0.   , 0.   , 0.042]), [0.0, 0.0, 0.0, 0.0]]
94
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.678, 0.984, 0.172, 0.282]), array([0.   , 0.   , 0.   , 0.056]), [0.0, 0.0, 0.0, 0.0]]
95
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.622, 0.974, 0.058, 0.174]), array([0.  , 0.  , 0.  , 0.06]), [0.0, 0.0, 0.0, 0.0]]
96
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.562, 0.984, 0.012, 0.158]), array([0.   , 0.   , 0.   , 0.044]), [0.002, 0.0, 0.0, 0.0]]
97
0
100
200
300
400
0





0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.09 , 0.874, 0.088, 0.214]), array([0.002, 0.002, 0.002, 0.176]), [0.994, 0.0, 0.0, 0.0]]
114
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.118, 0.898, 0.076, 0.198]), array([0.002, 0.004, 0.002, 0.158]), [0.0, 0.0, 0.0, 0.0]]
115
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.15 , 0.86 , 0.094, 0.176]), array([0.002, 0.002, 0.004, 0.16 ]), [0.0, 0.0, 0.0, 0.0]]
116
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.136, 0.854, 0.048, 0.158]), array([0.002, 0.   , 0.   , 0.142]), [0.0, 0.0, 0.0, 0.0]]
117
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.104, 0.86 , 0.01 , 0.172]), array([0.002, 0.   , 0.   , 0.16 ]), [0.0, 0.0, 0.0, 0.0]]
118
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.128, 0.88 , 0.002, 0.174]), array([0.002, 0.   , 0.




0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.116, 0.936, 0.074, 0.166]), array([0.002, 0.   , 0.002, 0.176]), [0.995, 0.0, 0.0, 0.0]]
122
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.148, 0.948, 0.01 , 0.152]), array([0.002, 0.   , 0.   , 0.148]), [0.0, 0.0, 0.0, 0.0]]
123
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.094, 0.938, 0.034, 0.158]), array([0.002, 0.   , 0.   , 0.144]), [0.0, 0.0, 0.0, 0.0]]
124
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.16 , 0.936, 0.036, 0.146]), array([0.002, 0.   , 0.   , 0.144]), [0.0, 0.0, 0.0, 0.0]]
125
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.18 , 0.916, 0.054, 0.136]), array([0.002, 0.   , 0.   , 0.158]), [0.0, 0.0, 0.0, 0.0]]
126
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.216, 0.91 , 0.002, 0.134]), array([0.002, 0.002, 0.




0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.062, 0.964, 0.086, 0.15 ]), array([0.002, 0.   , 0.   , 0.172]), [0.994, 0.0, 0.0, 0.0]]
130
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.174, 0.968, 0.036, 0.114]), array([0.002, 0.   , 0.   , 0.162]), [0.0, 0.0, 0.0, 0.0]]
131
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.126, 0.97 , 0.034, 0.102]), array([0.002, 0.   , 0.   , 0.164]), [0.0, 0.0, 0.0, 0.0]]
132
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.11 , 0.97 , 0.022, 0.1  ]), array([0.   , 0.   , 0.   , 0.166]), [0.0, 0.0, 0.0, 0.0]]
133
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.268, 0.946, 0.002, 0.106]), array([0.   , 0.   , 0.   , 0.142]), [0.0, 0.0, 0.0, 0.0]]
134
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.184, 0.938, 0.002, 0.11 ]), array([0.   , 0.   , 0.

  scores = weights * exp(dot(X, beta))
  denom = 1.0 / np.array([risk_phi])
  a1 = risk_phi_x_x * denom
  summand = numer * denom[:, None]
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  a1 = einsum("ab,i->ab", risk_phi_x_x, denom) - einsum("ab,i->ab", tie_phi_x_x, increasing_proportion * denom)
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)


0
100
200
300
400
0
100
200
300
400
[array([0.28 , 0.894, 0.   , 0.16 ]), array([0.  , 0.  , 0.  , 0.04]), [0.059, 0.0, 1.0, 0.0]]
168
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.47 , 0.996, 0.19 , 0.284]), array([0.   , 0.   , 0.   , 0.054]), [0.0, 0.0, 0.0, 0.0]]
169
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.416, 0.994, 0.312, 0.198]), array([0.   , 0.   , 0.   , 0.058]), [0.0, 0.0, 0.0, 0.0]]
170
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.438, 0.984, 0.178, 0.214]), array([0.   , 0.   , 0.   , 0.038]), [0.0, 0.0, 0.0, 0.0]]
171
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.596, 0.974, 0.158, 0.218]), array([0.  , 0.  , 0.  , 0.04]), [0.0, 0.0, 0.0, 0.0]]
172
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.588, 0.964, 0.064, 0.19 ]), array([0.   , 0.   , 0.   , 0.052]), [0.0, 0.0, 0.0, 0.0]]
173
0
10


>>> events = df['event'].astype(bool)
>>> print(df.loc[events, 'IndCentMembers'].var())
>>> print(df.loc[~events, 'IndCentMembers'].var())

A very low variance means that the column IndCentMembers completely determines whether a subject dies or not. See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression.




100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.  , 0.86, 0.  , 0.13]), array([0.07 , 0.   , 0.   , 0.026]), [0.996, 0.0, 0.0, 0.0]]
183
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.694, 0.994, 0.444, 0.346]), array([0.   , 0.   , 0.   , 0.046]), [0.0, 0.0, 0.0, 0.0]]
184
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.626, 0.998, 0.304, 0.274]), array([0.   , 0.   , 0.   , 0.052]), [0.0, 0.0, 0.0, 0.0]]
185
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.63 , 0.982, 0.174, 0.234]), array([0.   , 0.   , 0.   , 0.046]), [0.0, 0.0, 0.0, 0.0]]
186
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.626, 0.984, 0.134, 0.224]), array([0.   , 0.   , 0.   , 0.038]), [0.0, 0.0, 0.0, 0.0]]
187
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.624, 0.99 , 0.052, 0.182]), array([0.  , 0.  , 0.  , 0.04

  scores = weights * exp(dot(X, beta))
  denom = 1.0 / np.array([risk_phi])
  a1 = risk_phi_x_x * denom
  summand = numer * denom[:, None]
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  a1 = einsum("ab,i->ab", risk_phi_x_x, denom) - einsum("ab,i->ab", tie_phi_x_x, increasing_proportion * denom)
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)


0
100
200
300
400
0
100
200
300
400
[array([0.302, 0.95 , 0.   , 0.198]), array([0.   , 0.   , 0.   , 0.036]), [0.046, 0.0, 1.0, 0.0]]
193
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.212, 0.866, 0.   , 0.184]), array([0.   , 0.   , 0.   , 0.038]), [0.02, 0.0, 0.0, 0.0]]
194
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.702, 0.996, 0.448, 0.636]), array([0.   , 0.   , 0.002, 0.018]), [0.0, 0.0, 0.0, 0.0]]
195
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.734, 0.996, 0.698, 0.28 ]), array([0.  , 0.  , 0.  , 0.05]), [0.0, 0.0, 0.0, 0.0]]
196
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.664, 0.99 , 0.284, 0.336]), array([0.  , 0.  , 0.  , 0.04]), [0.0, 0.0, 0.0, 0.0]]
197
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.686, 0.988, 0.17 , 0.268]), array([0.   , 0.   , 0.   , 0.042]), [0.0, 0.0, 0.0, 0.0]]
198
0
1




0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.082, 0.864, 0.08 , 0.218]), array([0.002, 0.002, 0.006, 0.17 ]), [0.994, 0.0, 0.0, 0.0]]
214
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.114, 0.898, 0.07 , 0.2  ]), array([0.002, 0.002, 0.002, 0.154]), [0.0, 0.0, 0.0, 0.0]]
215
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.124, 0.866, 0.092, 0.176]), array([0.002, 0.   , 0.002, 0.176]), [0.0, 0.0, 0.0, 0.0]]
216
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.112, 0.854, 0.038, 0.162]), array([0.002, 0.   , 0.   , 0.138]), [0.0, 0.0, 0.0, 0.0]]
217
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.092, 0.864, 0.006, 0.178]), array([0.002, 0.   , 0.   , 0.138]), [0.0, 0.0, 0.0, 0.0]]
218
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.134, 0.872, 0.008, 0.172]), array([0.002, 0.   , 0.




100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.128, 0.934, 0.054, 0.156]), array([0.002, 0.   , 0.   , 0.172]), [0.992, 0.0, 0.0, 0.0]]
222
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.142, 0.944, 0.   , 0.162]), array([0.002, 0.   , 0.   , 0.166]), [0.0, 0.0, 0.0, 0.0]]
223
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.086, 0.922, 0.002, 0.158]), array([0.002, 0.   , 0.   , 0.16 ]), [0.0, 0.0, 0.0, 0.0]]
224
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.138, 0.934, 0.046, 0.144]), array([0.002, 0.   , 0.   , 0.148]), [0.0, 0.0, 0.0, 0.0]]
225
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.154, 0.914, 0.032, 0.136]), array([0.004, 0.   , 0.   , 0.164]), [0.0, 0.0, 0.0, 0.0]]
226
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.2  , 0.914, 0.002, 0.14 ]), array([0.002, 0.   , 0.  




0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.104, 0.96 , 0.054, 0.12 ]), array([0.002, 0.   , 0.   , 0.166]), [0.991, 0.0, 0.0, 0.0]]
230
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.234, 0.968, 0.002, 0.116]), array([0.002, 0.   , 0.   , 0.16 ]), [0.0, 0.0, 0.0, 0.0]]
231
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.104, 0.966, 0.008, 0.116]), array([0.002, 0.   , 0.   , 0.16 ]), [0.0, 0.0, 0.0, 0.0]]
232
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.12 , 0.952, 0.002, 0.114]), array([0.   , 0.   , 0.   , 0.156]), [0.0, 0.0, 0.0, 0.0]]
233
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.236, 0.94 , 0.004, 0.124]), array([0.   , 0.   , 0.   , 0.158]), [0.0, 0.0, 0.0, 0.0]]
234
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.168, 0.926, 0.002, 0.108]), array([0.  , 0.  , 0.  



0
100
200
300
400
0
100
200
300
400
[array([0.426, 0.902, 0.   , 0.154]), array([0.   , 0.   , 0.   , 0.036]), [0.0, 0.0, 0.021, 0.0]]
268
0
100
200
300
400
0
100
200
300
400


  scores = weights * exp(dot(X, beta))
  a1 = risk_phi_x_x * denom
  summand = numer * denom[:, None]
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)


0
100
200
300
400
0
100
200
300
400
[array([0.282, 0.93 , 0.   , 0.134]), array([0.   , 0.   , 0.   , 0.046]), [0.055, 0.0, 1.0, 0.0]]
269
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.198, 0.912, 0.   , 0.184]), array([0.   , 0.   , 0.   , 0.046]), [0.035, 0.0, 0.0, 0.0]]
270



>>> events = df['event'].astype(bool)
>>> print(df.loc[events, 'IndCentMembers'].var())
>>> print(df.loc[~events, 'IndCentMembers'].var())

A very low variance means that the column IndCentMembers completely determines whether a subject dies or not. See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression.




0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.  , 0.9 , 0.  , 0.16]), array([0.164, 0.   , 0.   , 0.022]), [0.994, 0.0, 0.0, 0.0]]
271
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.574, 0.986, 0.162, 0.278]), array([0.  , 0.  , 0.  , 0.05]), [0.0, 0.0, 0.0, 0.0]]
272
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.508, 0.996, 0.416, 0.256]), array([0.   , 0.   , 0.   , 0.046]), [0.0, 0.0, 0.0, 0.0]]
273
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.656, 0.992, 0.152, 0.31 ]), array([0.   , 0.   , 0.   , 0.038]), [0.0, 0.0, 0.0, 0.0]]
274
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.582, 0.994, 0.146, 0.27 ]), array([0.   , 0.   , 0.   , 0.042]), [0.0, 0.0, 0.0, 0.0]]
275
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.658, 0.99 , 0.058, 0.254]), array([0.  , 0.  , 0.  , 0.04])



100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.492, 0.982, 0.002, 0.192]), array([0.   , 0.   , 0.   , 0.034]), [0.0, 0.0, 0.0, 0.0]]
282
0


  scores = weights * exp(dot(X, beta))
  denom = 1.0 / np.array([risk_phi])
  a1 = risk_phi_x_x * denom
  summand = numer * denom[:, None]
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  a1 = einsum("ab,i->ab", risk_phi_x_x, denom) - einsum("ab,i->ab", tie_phi_x_x, increasing_proportion * denom)
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()


100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.   , 0.952, 0.002, 0.174]), array([0.352, 0.   , 0.   , 0.044]), [1.0, 0.0, 0.0, 0.0]]
283
0
100
200
300
400
0
100
200
300
400


  scores = weights * exp(dot(X, beta))
  denom = 1.0 / np.array([risk_phi])
  a1 = risk_phi_x_x * denom
  summand = numer * denom[:, None]
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  a1 = einsum("ab,i->ab", risk_phi_x_x, denom) - einsum("ab,i->ab", tie_phi_x_x, increasing_proportion * denom)
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)


0
100
200
300
400
0
100
200
300
400
[array([0.328, 0.96 , 0.   , 0.186]), array([0.  , 0.  , 0.  , 0.03]), [0.054, 0.0, 1.0, 0.0]]
284
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.172, 0.916, 0.   , 0.196]), array([0.  , 0.  , 0.  , 0.04]), [0.022, 0.0, 0.0, 0.0]]
285



>>> events = df['event'].astype(bool)
>>> print(df.loc[events, 'IndCentMembers'].var())
>>> print(df.loc[~events, 'IndCentMembers'].var())

A very low variance means that the column IndCentMembers completely determines whether a subject dies or not. See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression.




0
100
200
300
400
0
100
200
300
400


  scores = weights * exp(dot(X, beta))
  denom = 1.0 / np.array([risk_phi])
  a1 = risk_phi_x_x * denom
  summand = numer * denom[:, None]
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  a1 = einsum("ab,i->ab", risk_phi_x_x, denom) - einsum("ab,i->ab", tie_phi_x_x, increasing_proportion * denom)
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)


0
100
200
300
400




0
100
200
300
400
[array([0.   , 0.914, 0.   , 0.04 ]), array([0.146, 0.   , 0.   , 0.122]), [0.996, 0.0, 1.0, 0.506]]
286
0



>>> events = df['event'].astype(bool)
>>> print(df.loc[events, 'IndCentMembers'].var())
>>> print(df.loc[~events, 'IndCentMembers'].var())

A very low variance means that the column IndCentMembers completely determines whether a subject dies or not. See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression.




100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.   , 0.826, 0.   , 0.104]), array([0.1  , 0.   , 0.   , 0.024]), [0.995, 0.0, 0.0, 0.0]]
287
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.864, 0.994, 0.452, 0.816]), array([0.   , 0.   , 0.002, 0.002]), [0.0, 0.0, 0.0, 0.0]]
288
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.78 , 0.994, 0.216, 0.418]), array([0.  , 0.  , 0.  , 0.04]), [0.0, 0.0, 0.0, 0.0]]
289
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.676, 0.994, 0.07 , 0.374]), array([0.   , 0.   , 0.   , 0.038]), [0.0, 0.0, 0.0, 0.0]]
290
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.67 , 0.992, 0.13 , 0.28 ]), array([0.   , 0.   , 0.   , 0.054]), [0.0, 0.0, 0.0, 0.0]]
291
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.762, 1.   , 0.1  , 0.236]), array([0.  , 0.  , 0.  , 0.03

  scores = weights * exp(dot(X, beta))
  denom = 1.0 / np.array([risk_phi])
  a1 = risk_phi_x_x * denom
  summand = numer * denom[:, None]
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  a1 = einsum("ab,i->ab", risk_phi_x_x, denom) - einsum("ab,i->ab", tie_phi_x_x, increasing_proportion * denom)
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)


0
100
200
300
400
0
100
200
300
400
[array([0.532, 0.974, 0.   , 0.16 ]), array([0.   , 0.   , 0.   , 0.048]), [0.002, 0.0, 1.0, 0.0]]
296
0
100
200
300
400
0
100
200
300
400


  scores = weights * exp(dot(X, beta))
  denom = 1.0 / np.array([risk_phi])
  a1 = risk_phi_x_x * denom
  summand = numer * denom[:, None]
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  a1 = einsum("ab,i->ab", risk_phi_x_x, denom) - einsum("ab,i->ab", tie_phi_x_x, increasing_proportion * denom)
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)


0
100
200
300
400
0
100
200
300
400
[array([0.486, 0.964, 0.   , 0.2  ]), array([0.   , 0.   , 0.   , 0.022]), [0.061, 0.0, 1.0, 0.0]]
297
0



>>> events = df['event'].astype(bool)
>>> print(df.loc[events, 'IndCentMembers'].var())
>>> print(df.loc[~events, 'IndCentMembers'].var())

A very low variance means that the column IndCentMembers completely determines whether a subject dies or not. See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression.




100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.   , 0.936, 0.   , 0.186]), array([0.254, 0.   , 0.   , 0.028]), [0.995, 0.0, 0.0, 0.0]]
298
0



>>> events = df['event'].astype(bool)
>>> print(df.loc[events, 'IndCentMembers'].var())
>>> print(df.loc[~events, 'IndCentMembers'].var())

A very low variance means that the column IndCentMembers completely determines whether a subject dies or not. See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression.




100
200
300
400
0
100
200
300
400


  scores = weights * exp(dot(X, beta))
  denom = 1.0 / np.array([risk_phi])
  a1 = risk_phi_x_x * denom
  summand = numer * denom[:, None]
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  a1 = einsum("ab,i->ab", risk_phi_x_x, denom) - einsum("ab,i->ab", tie_phi_x_x, increasing_proportion * denom)
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)


0
100
200
300
400




0
100
200
300
400
[array([0.   , 0.936, 0.   , 0.028]), array([0.182, 0.   , 0.   , 0.216]), [0.996, 0.0, 1.0, 0.394]]
299
0



>>> events = df['event'].astype(bool)
>>> print(df.loc[events, 'IndCentMembers'].var())
>>> print(df.loc[~events, 'IndCentMembers'].var())

A very low variance means that the column IndCentMembers completely determines whether a subject dies or not. See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression.




100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.   , 0.884, 0.   , 0.152]), array([0.114, 0.   , 0.   , 0.01 ]), [0.995, 0.0, 0.0, 0.0]]
300
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0., 0., 0., 0.]), array([0., 0., 0., 0.]), [0.612, 0.333, 0.544, 0.621]]
301
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.   , 0.   , 0.   , 0.014]), array([0.   , 0.   , 0.   , 0.004]), [0.899, 0.061, 0.778, 0.459]]
302
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.12 , 0.886, 0.106, 0.162]), array([0.   , 0.   , 0.026, 0.126]), [0.282, 0.011, 0.15, 0.003]]
303
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.052, 0.802, 0.   , 0.2  ]), array([0.   , 0.002, 0.   , 0.174]), [0.003, 0.0, 0.0, 0.0]]
304
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.036, 0.792, 0.   , 0.164]), array([0.   , 0.006, 0. 


>>> events = df['event'].astype(bool)
>>> print(df.loc[events, 'IndCentMembers'].var())
>>> print(df.loc[~events, 'IndCentMembers'].var())

A very low variance means that the column IndCentMembers completely determines whether a subject dies or not. See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression.




0
100
200
300
400
0
100
200
300
400
[array([0.388, 1.   , 0.   , 0.486]), array([0.   , 0.   , 0.   , 0.002]), [0.391, 0.355, 0.994, 0.0]]
311
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.052, 0.828, 0.062, 0.214]), array([0.002, 0.   , 0.006, 0.156]), [0.0, 0.0, 0.0, 0.0]]
312
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.078, 0.882, 0.046, 0.226]), array([0.002, 0.002, 0.004, 0.144]), [0.0, 0.0, 0.0, 0.0]]
313





0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.088, 0.868, 0.09 , 0.228]), array([0.002, 0.002, 0.004, 0.176]), [0.994, 0.0, 0.0, 0.0]]
314
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.116, 0.894, 0.078, 0.204]), array([0.002, 0.002, 0.002, 0.15 ]), [0.0, 0.0, 0.0, 0.0]]
315
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.132, 0.866, 0.09 , 0.182]), array([0.002, 0.   , 0.002, 0.162]), [0.0, 0.0, 0.0, 0.0]]
316
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.112, 0.856, 0.038, 0.172]), array([0.002, 0.   , 0.   , 0.138]), [0.0, 0.0, 0.0, 0.0]]
317
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.104, 0.862, 0.01 , 0.17 ]), array([0.002, 0.   , 0.   , 0.144]), [0.0, 0.0, 0.0, 0.0]]
318
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.136, 0.86 , 0.002, 0.174]), array([0.002, 0.   , 0.




0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.128, 0.934, 0.06 , 0.16 ]), array([0.002, 0.   , 0.   , 0.168]), [0.993, 0.0, 0.0, 0.0]]
322
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.144, 0.934, 0.008, 0.156]), array([0.002, 0.002, 0.   , 0.154]), [0.0, 0.0, 0.0, 0.0]]
323
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.084, 0.93 , 0.002, 0.158]), array([0.002, 0.   , 0.   , 0.154]), [0.0, 0.0, 0.0, 0.0]]
324
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.146, 0.932, 0.046, 0.15 ]), array([0.002, 0.   , 0.   , 0.142]), [0.0, 0.0, 0.0, 0.0]]
325
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.17 , 0.916, 0.02 , 0.134]), array([0.002, 0.   , 0.   , 0.154]), [0.0, 0.0, 0.0, 0.0]]
326
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.208, 0.914, 0.002, 0.148]), array([0.002, 0.   , 0.




0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.12 , 0.966, 0.09 , 0.156]), array([0.002, 0.   , 0.   , 0.188]), [0.994, 0.0, 0.0, 0.0]]
330
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.242, 0.972, 0.002, 0.106]), array([0.002, 0.   , 0.   , 0.16 ]), [0.0, 0.0, 0.0, 0.0]]
331
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.124, 0.962, 0.032, 0.104]), array([0.002, 0.   , 0.   , 0.162]), [0.0, 0.0, 0.0, 0.0]]
332
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.088, 0.96 , 0.032, 0.138]), array([0.   , 0.   , 0.   , 0.148]), [0.0, 0.0, 0.0, 0.0]]
333
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.27 , 0.956, 0.002, 0.106]), array([0.   , 0.   , 0.   , 0.156]), [0.0, 0.0, 0.0, 0.0]]
334
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.176, 0.93 , 0.002, 0.106]), array([0.   , 0.   , 0.




0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.116, 0.974, 0.106, 0.164]), array([0.  , 0.  , 0.  , 0.14]), [0.991, 0.0, 0.0, 0.0]]
339
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.258, 0.966, 0.046, 0.14 ]), array([0.   , 0.   , 0.   , 0.146]), [0.0, 0.0, 0.0, 0.0]]
340
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.21 , 0.97 , 0.044, 0.12 ]), array([0.   , 0.   , 0.   , 0.128]), [0.0, 0.0, 0.0, 0.0]]
341
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.134, 0.958, 0.004, 0.12 ]), array([0.   , 0.   , 0.   , 0.134]), [0.0, 0.0, 0.0, 0.0]]
342
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.166, 0.962, 0.036, 0.122]), array([0.   , 0.   , 0.   , 0.148]), [0.0, 0.0, 0.0, 0.0]]
343
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.262, 0.972, 0.054, 0.11 ]), array([0.   , 0.   , 0.   ,




0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.266, 0.984, 0.058, 0.136]), array([0.002, 0.   , 0.   , 0.132]), [0.993, 0.0, 0.0, 0.0]]
351
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.378, 0.978, 0.104, 0.134]), array([0.   , 0.   , 0.   , 0.134]), [0.0, 0.0, 0.0, 0.0]]
352
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.206, 0.978, 0.038, 0.148]), array([0.   , 0.   , 0.   , 0.074]), [0.0, 0.0, 0.0, 0.0]]
353
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.246, 0.976, 0.05 , 0.138]), array([0.   , 0.   , 0.   , 0.112]), [0.0, 0.0, 0.0, 0.0]]
354
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.37 , 0.982, 0.03 , 0.152]), array([0.  , 0.  , 0.  , 0.09]), [0.0, 0.0, 0.0, 0.0]]
355
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.39 , 0.972, 0.01 , 0.124]), array([0.  , 0.  , 0.  , 0.

  scores = weights * exp(dot(X, beta))
  a1 = risk_phi_x_x * denom
  summand = numer * denom[:, None]
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)


0
100
200
300
400
0
100
200
300
400
[array([0.298, 0.928, 0.   , 0.17 ]), array([0.   , 0.   , 0.   , 0.056]), [0.059, 0.0, 1.0, 0.0]]
368
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.248, 0.886, 0.   , 0.15 ]), array([0.   , 0.   , 0.   , 0.042]), [0.04, 0.0, 0.0, 0.0]]
369
0


  scores = weights * exp(dot(X, beta))
  denom = 1.0 / np.array([risk_phi])
  a1 = risk_phi_x_x * denom
  summand = numer * denom[:, None]
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  a1 = einsum("ab,i->ab", risk_phi_x_x, denom) - einsum("ab,i->ab", tie_phi_x_x, increasing_proportion * denom)
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()


100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.   , 0.912, 0.   , 0.176]), array([0.118, 0.   , 0.   , 0.042]), [1.0, 0.0, 0.0, 0.0]]
370
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.658, 0.99 , 0.218, 0.274]), array([0.   , 0.   , 0.   , 0.048]), [0.0, 0.0, 0.0, 0.0]]
371
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.62, 0.99, 0.38, 0.25]), array([0.   , 0.   , 0.   , 0.046]), [0.0, 0.0, 0.0, 0.0]]
372
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.646, 0.986, 0.318, 0.25 ]), array([0.   , 0.   , 0.   , 0.046]), [0.0, 0.0, 0.0, 0.0]]
373
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.574, 0.99 , 0.132, 0.262]), array([0.   , 0.   , 0.   , 0.046]), [0.0, 0.0, 0.0, 0.0]]
374
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.66 , 0.986, 0.1  , 0.258]), array([0.   , 0.   , 0.   , 0.0

  scores = weights * exp(dot(X, beta))
  denom = 1.0 / np.array([risk_phi])
  a1 = risk_phi_x_x * denom
  summand = numer * denom[:, None]
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  a1 = einsum("ab,i->ab", risk_phi_x_x, denom) - einsum("ab,i->ab", tie_phi_x_x, increasing_proportion * denom)
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)


0
100
200
300
400
0
100
200
300
400
[array([0.46 , 0.99 , 0.   , 0.166]), array([0.   , 0.   , 0.   , 0.034]), [0.093, 0.0, 1.0, 0.0]]
383
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.488, 0.988, 0.   , 0.26 ]), array([0.   , 0.002, 0.   , 0.022]), [0.066, 0.0, 0.0, 0.0]]
384
0


  scores = weights * exp(dot(X, beta))
  a1 = risk_phi_x_x * denom
  summand = numer * denom[:, None]
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)


100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.002, 0.994, 0.   , 0.242]), array([0.394, 0.   , 0.   , 0.04 ]), [1.0, 0.0, 0.0, 0.0]]
385
0



>>> events = df['event'].astype(bool)
>>> print(df.loc[events, 'IndCentMembers'].var())
>>> print(df.loc[~events, 'IndCentMembers'].var())

A very low variance means that the column IndCentMembers completely determines whether a subject dies or not. See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression.




100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.   , 0.984, 0.   , 0.184]), array([0.236, 0.   , 0.   , 0.022]), [0.995, 0.0, 1.0, 0.0]]
386



>>> events = df['event'].astype(bool)
>>> print(df.loc[events, 'IndCentMembers'].var())
>>> print(df.loc[~events, 'IndCentMembers'].var())

A very low variance means that the column IndCentMembers completely determines whether a subject dies or not. See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression.




0
100
200
300
400
0
100
200
300
400
0
100
200
300
400


  scores = weights * exp(dot(X, beta))
  a1 = risk_phi_x_x * denom
  summand = numer * denom[:, None]
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)


0
100
200
300
400
[array([0.   , 0.982, 0.   , 0.03 ]), array([0.306, 0.   , 0.   , 0.266]), [0.996, 0.0, 0.0, 1.0]]
387
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.   , 0.99 , 0.   , 0.038]), array([0.266, 0.   , 0.   , 0.246]), [0.926, 0.0, 0.0, 1.0]]
388
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.   , 0.976, 0.   , 0.168]), array([0.146, 0.   , 0.   , 0.034]), [0.935, 0.0, 0.0, 0.0]]
389
0



>>> events = df['event'].astype(bool)
>>> print(df.loc[events, 'IndCentMembers'].var())
>>> print(df.loc[~events, 'IndCentMembers'].var())

A very low variance means that the column IndCentMembers completely determines whether a subject dies or not. See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression.




100
200
300
400
0
100
200
300
400





0
100
200
300
400
0
100
200
300
400
[array([0.002, 0.972, 0.   , 0.146]), array([0.194, 0.   , 0.   , 0.026]), [0.996, 0.0, 0.99, 0.0]]
390
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.   , 0.976, 0.   , 0.15 ]), array([0.132, 0.002, 0.   , 0.02 ]), [0.95, 0.0, 0.0, 0.0]]
391
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.  , 0.98, 0.  , 0.12]), array([0.036, 0.   , 0.   , 0.014]), [0.953, 0.001, 0.0, 0.0]]
392
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.   , 0.97 , 0.   , 0.152]), array([0.028, 0.   , 0.   , 0.014]), [0.953, 0.002, 0.0, 0.0]]
393
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.   , 0.974, 0.   , 0.116]), array([0.034, 0.   , 0.   , 0.008]), [0.954, 0.042, 0.0, 0.0]]
394
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.002, 0.96 , 0.   , 0.168]), array([0.   , 0.   , 0.   , 0.026]), [0.951, 0.02


>>> events = df['event'].astype(bool)
>>> print(df.loc[events, 'IndCentMembers'].var())
>>> print(df.loc[~events, 'IndCentMembers'].var())

A very low variance means that the column IndCentMembers completely determines whether a subject dies or not. See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression.




0
100
200
300
400
0
100
200
300
400
[array([0.338, 1.   , 0.   , 0.342]), array([0.   , 0.   , 0.   , 0.014]), [0.609, 0.149, 0.995, 0.0]]
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0., 0., 0., 0.]), array([0., 0., 0., 0.]), [0.612, 0.333, 0.544, 0.621]]
401
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.   , 0.   , 0.   , 0.014]), array([0.   , 0.   , 0.   , 0.004]), [0.899, 0.061, 0.778, 0.459]]
402
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.118, 0.88 , 0.1  , 0.168]), array([0.   , 0.   , 0.028, 0.124]), [0.282, 0.011, 0.15, 0.003]]
403
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.052, 0.784, 0.   , 0.202]), array([0.   , 0.002, 0.   , 0.18 ]), [0.003, 0.0, 0.0, 0.0]]
404
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.03 , 0.792, 0.   , 0.154]), array([0.   , 0.006, 0.   , 0.278]), [0.001, 0.0, 0.0,




0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.088, 0.87 , 0.08 , 0.222]), array([0.002, 0.002, 0.006, 0.172]), [0.994, 0.0, 0.0, 0.0]]
414
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.114, 0.886, 0.078, 0.206]), array([0.002, 0.004, 0.002, 0.156]), [0.0, 0.0, 0.0, 0.0]]
415
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.14 , 0.86 , 0.104, 0.182]), array([0.002, 0.002, 0.002, 0.174]), [0.0, 0.0, 0.0, 0.0]]
416
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.108, 0.86 , 0.024, 0.154]), array([0.002, 0.   , 0.   , 0.138]), [0.0, 0.0, 0.0, 0.0]]
417
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.096, 0.858, 0.008, 0.17 ]), array([0.002, 0.   , 0.   , 0.132]), [0.0, 0.0, 0.0, 0.0]]
418
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.134, 0.848, 0.   , 0.17 ]), array([0.002, 0.   , 0.




0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.17 , 0.932, 0.066, 0.154]), array([0.002, 0.   , 0.   , 0.166]), [0.993, 0.0, 0.0, 0.0]]
422
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.16 , 0.938, 0.006, 0.152]), array([0.002, 0.   , 0.   , 0.186]), [0.0, 0.0, 0.0, 0.0]]
423
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.094, 0.922, 0.006, 0.156]), array([0.002, 0.   , 0.   , 0.162]), [0.0, 0.0, 0.0, 0.0]]
424
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.146, 0.934, 0.032, 0.142]), array([0.002, 0.   , 0.   , 0.148]), [0.0, 0.0, 0.0, 0.0]]
425
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.166, 0.918, 0.034, 0.132]), array([0.002, 0.   , 0.   , 0.156]), [0.0, 0.0, 0.0, 0.0]]
426
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.188, 0.912, 0.002, 0.138]), array([0.002, 0.   , 0.




0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.14 , 0.972, 0.052, 0.118]), array([0.002, 0.   , 0.   , 0.166]), [0.992, 0.0, 0.0, 0.0]]
430
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.192, 0.972, 0.016, 0.104]), array([0.002, 0.   , 0.   , 0.162]), [0.0, 0.0, 0.0, 0.0]]
431
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.108, 0.962, 0.058, 0.108]), array([0.002, 0.   , 0.   , 0.164]), [0.0, 0.0, 0.0, 0.0]]
432
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.108, 0.958, 0.002, 0.112]), array([0.  , 0.  , 0.  , 0.15]), [0.0, 0.0, 0.0, 0.0]]
433
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.19 , 0.952, 0.004, 0.098]), array([0.   , 0.   , 0.   , 0.172]), [0.0, 0.0, 0.0, 0.0]]
434
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.18 , 0.926, 0.   , 0.104]), array([0.   , 0.   , 0.   ,




0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.236, 0.962, 0.11 , 0.144]), array([0.   , 0.   , 0.   , 0.166]), [0.992, 0.0, 0.0, 0.0]]
439
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.146, 0.972, 0.034, 0.106]), array([0.  , 0.  , 0.  , 0.17]), [0.0, 0.0, 0.0, 0.0]]
440
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.138, 0.966, 0.032, 0.112]), array([0.  , 0.  , 0.  , 0.12]), [0.0, 0.0, 0.0, 0.0]]
441
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.194, 0.968, 0.1  , 0.12 ]), array([0.   , 0.   , 0.   , 0.132]), [0.0, 0.0, 0.0, 0.0]]
442
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.21 , 0.964, 0.032, 0.098]), array([0.   , 0.   , 0.   , 0.142]), [0.0, 0.0, 0.0, 0.0]]
443
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.308, 0.96 , 0.082, 0.108]), array([0.   , 0.   , 0.   , 0.1




0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.242, 0.966, 0.098, 0.18 ]), array([0.002, 0.   , 0.   , 0.122]), [0.991, 0.0, 0.0, 0.0]]
450
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.344, 0.982, 0.134, 0.146]), array([0.   , 0.   , 0.   , 0.112]), [0.0, 0.0, 0.0, 0.0]]
451
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.324, 0.984, 0.094, 0.152]), array([0.   , 0.   , 0.   , 0.086]), [0.0, 0.0, 0.0, 0.0]]
452
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.204, 0.98 , 0.006, 0.13 ]), array([0.   , 0.   , 0.   , 0.082]), [0.0, 0.0, 0.0, 0.0]]
453
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.288, 0.976, 0.01 , 0.136]), array([0.   , 0.   , 0.   , 0.074]), [0.0, 0.0, 0.0, 0.0]]
454
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.344, 0.974, 0.008, 0.13 ]), array([0.   , 0.   , 0.



0
100
200
300
400
0
100
200
300
400
[array([0.228, 0.902, 0.   , 0.128]), array([0.  , 0.  , 0.  , 0.03]), [0.001, 0.0, 0.855, 0.0]]
467
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.236, 0.886, 0.   , 0.106]), array([0.   , 0.   , 0.   , 0.046]), [0.041, 0.0, 0.0, 0.0]]
468
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.502, 0.994, 0.316, 0.248]), array([0.   , 0.   , 0.   , 0.038]), [0.0, 0.0, 0.0, 0.0]]
469
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.626, 0.994, 0.368, 0.198]), array([0.   , 0.   , 0.   , 0.052]), [0.0, 0.0, 0.0, 0.0]]
470
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.556, 0.986, 0.196, 0.212]), array([0.   , 0.   , 0.   , 0.038]), [0.0, 0.0, 0.0, 0.0]]
471
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.554, 0.98 , 0.08 , 0.208]), array([0.   , 0.   , 0.   , 0.038]), [0.0, 0.0, 0.0, 0.0]]


  scores = weights * exp(dot(X, beta))
  denom = 1.0 / np.array([risk_phi])
  a1 = risk_phi_x_x * denom
  summand = numer * denom[:, None]
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  a1 = einsum("ab,i->ab", risk_phi_x_x, denom) - einsum("ab,i->ab", tie_phi_x_x, increasing_proportion * denom)
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)


0
100
200
300
400
0
100
200
300
400
[array([0.258, 0.93 , 0.   , 0.168]), array([0.002, 0.   , 0.   , 0.04 ]), [0.04, 0.0, 1.0, 0.0]]
482
0



>>> events = df['event'].astype(bool)
>>> print(df.loc[events, 'IndCentMembers'].var())
>>> print(df.loc[~events, 'IndCentMembers'].var())

A very low variance means that the column IndCentMembers completely determines whether a subject dies or not. See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression.




100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.   , 0.904, 0.   , 0.186]), array([0.082, 0.   , 0.   , 0.026]), [0.996, 0.0, 0.0, 0.0]]
483
0



>>> events = df['event'].astype(bool)
>>> print(df.loc[events, 'IndCentMembers'].var())
>>> print(df.loc[~events, 'IndCentMembers'].var())

A very low variance means that the column IndCentMembers completely determines whether a subject dies or not. See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression.




100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.   , 0.878, 0.   , 0.176]), array([0.072, 0.   , 0.   , 0.03 ]), [0.995, 0.0, 0.0, 0.0]]
484
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.778, 0.996, 0.414, 0.66 ]), array([0.   , 0.   , 0.002, 0.01 ]), [0.0, 0.0, 0.0, 0.0]]
485
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.784, 0.998, 0.402, 0.374]), array([0.  , 0.  , 0.  , 0.04]), [0.0, 0.0, 0.0, 0.0]]
486
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.676, 0.986, 0.152, 0.306]), array([0.  , 0.  , 0.  , 0.04]), [0.0, 0.0, 0.0, 0.0]]
487
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.718, 0.982, 0.06 , 0.33 ]), array([0.   , 0.   , 0.   , 0.042]), [0.0, 0.0, 0.0, 0.0]]
488
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.698, 0.986, 0.086, 0.266]), array([0.   , 0.002, 0.   , 0.066

  scores = weights * exp(dot(X, beta))
  a1 = risk_phi_x_x * denom
  summand = numer * denom[:, None]
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)


0
100
200
300
400
0
100
200
300
400
[array([0.458, 0.976, 0.   , 0.202]), array([0.   , 0.   , 0.   , 0.054]), [0.101, 0.0, 1.0, 0.0]]
493
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.396, 0.984, 0.   , 0.186]), array([0.   , 0.   , 0.   , 0.032]), [0.067, 0.0, 0.0, 0.0]]
494
0



>>> events = df['event'].astype(bool)
>>> print(df.loc[events, 'IndCentMembers'].var())
>>> print(df.loc[~events, 'IndCentMembers'].var())

A very low variance means that the column IndCentMembers completely determines whether a subject dies or not. See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression.




100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.   , 0.966, 0.   , 0.202]), array([0.292, 0.   , 0.   , 0.042]), [0.995, 0.0, 0.0, 0.0]]
495



>>> events = df['event'].astype(bool)
>>> print(df.loc[events, 'IndCentMembers'].var())
>>> print(df.loc[~events, 'IndCentMembers'].var())

A very low variance means that the column IndCentMembers completely determines whether a subject dies or not. See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression.




0
100
200
300
400
0
100
200
300
400


  scores = weights * exp(dot(X, beta))
  denom = 1.0 / np.array([risk_phi])
  a1 = risk_phi_x_x * denom
  summand = numer * denom[:, None]
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  a1 = einsum("ab,i->ab", risk_phi_x_x, denom) - einsum("ab,i->ab", tie_phi_x_x, increasing_proportion * denom)
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  log_lik = log_lik + dot(x_death_sum, beta) + weighted_average * log(denom).sum()
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  denom = 1.0 / (risk_phi - increasing_proportion * tie_phi)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)
  numer = risk_phi_x - multiply.outer(increasing_proportion, tie_phi_x)


0
100
200
300
400
0
100
200
300
400
[array([0.   , 0.926, 0.   , 0.188]), array([0.08, 0.  , 0.  , 0.04]), [0.996, 0.0, 1.0, 0.0]]
496



>>> events = df['event'].astype(bool)
>>> print(df.loc[events, 'IndCentMembers'].var())
>>> print(df.loc[~events, 'IndCentMembers'].var())

A very low variance means that the column IndCentMembers completely determines whether a subject dies or not. See https://stats.stackexchange.com/questions/11109/how-to-deal-with-perfect-separation-in-logistic-regression.




0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.   , 0.922, 0.   , 0.118]), array([0.092, 0.   , 0.   , 0.014]), [0.995, 0.0, 1.0, 0.0]]
497
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.854, 0.998, 0.658, 0.862]), array([0., 0., 0., 0.]), [0.0, 0.0, 0.0, 0.0]]
498
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.844, 0.996, 0.5  , 0.468]), array([0.   , 0.   , 0.   , 0.032]), [0.0, 0.0, 0.0, 0.0]]
499
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
0
100
200
300
400
[array([0.8  , 0.988, 0.252, 0.434]), array([0.   , 0.002, 0.   , 0.042]), [0.0, 0.0, 0.0, 0.0]]


## Priority-based filter out by metrics

In [4]:
def Aggregation(MetricTable,AggMetricList):
    AggMetricTable = DoSimEval(MetricTable, 'MaxSurvpVal',pCutoff, AggMetricList, ExcRate, NmodEahG)
    AggMetricRank = DoAggMetric(AggMetricList, AggMetricTable[['Model']+AggMetricList]).sort_values('Metrics')
    AggMetricRank = pd.merge(AggMetricRank, AggMetricTable[['Model','MaxSurvpVal']], on='Model', how='left')
    BestModel = AggMetricRank.sort_values('Metrics').iloc[-1]
    
    return AggMetricRank, BestModel

In [5]:
pCutoff = 0.005
ExcRate = 0.2
NmodEahG = 1

NegMetricList = ['IndCentRatio', 'MinABSSurvCoef', 'AvgABSSurvCoef',  'MinNegSigRate', 'AvgNegSigRate', 'MinABSGeCohD', 'AvgABSGeCohD']
PosMetricList = ['IndCentRatio', 'MinABSSurvCoef', 'AvgABSSurvCoef', 'MinPosSigRate', 'AvgPosSigRate', 'MinABSGeCohD', 'AvgABSGeCohD']

MetricTable = pd.read_csv('./MetricTables/RCFR_NoRiskRank_MetricTable.csv')
MetricTable['GroupM'] = np.array([re.findall('.\d+', i)[1][1:] for i in  MetricTable['Model']])
MetricTable['EpNum'] = np.array([ re.findall('.\d+\.', i)[0][1:-1] for i in  MetricTable['Model']]).astype('int')


In [6]:
NegAggMetricRank, NegBestModel =  Aggregation(MetricTable, NegMetricList)
PosAggMetricRank, PosBestModel =  Aggregation(MetricTable, PosMetricList)

NegAggMetricRank.to_csv('./MetricTables/RCFR_NoRisk_Neg_AggMetricRank_Filt'+str(NumGene_CL)+'.csv',index=False)
PosAggMetricRank.to_csv('./MetricTables/RCFR_NoRisk_Pos_AggMetricRank_Filt'+str(NumGene_CL)+'.csv',index=False)

1
N obs with filter of IndCentRatio : 69
N obs with filter of MinABSSurvCoef : 56
N obs with filter of AvgABSSurvCoef : 45
N obs with filter of MinNegSigRate : 36
N obs with filter of AvgNegSigRate : 29
N obs with filter of MinABSGeCohD : 24
N obs with filter of AvgABSGeCohD : 20

2
N obs with filter of IndCentRatio : 71
N obs with filter of MinABSSurvCoef : 57
N obs with filter of AvgABSSurvCoef : 46
N obs with filter of MinNegSigRate : 37
N obs with filter of AvgNegSigRate : 30
N obs with filter of MinABSGeCohD : 24
N obs with filter of AvgABSGeCohD : 20

3
N obs with filter of IndCentRatio : 63
N obs with filter of MinABSSurvCoef : 51
N obs with filter of AvgABSSurvCoef : 41
N obs with filter of MinNegSigRate : 33
N obs with filter of AvgNegSigRate : 27
N obs with filter of MinABSGeCohD : 22
N obs with filter of AvgABSGeCohD : 18

4
N obs with filter of IndCentRatio : 56
N obs with filter of MinABSSurvCoef : 45
N obs with filter of AvgABSSurvCoef : 36
N obs with filter of MinNegSigR

In [58]:
NegAggMetricRank[NegMetricList]

Unnamed: 0,IndCentRatio,MinABSSurvCoef,AvgABSSurvCoef,MinNegSigRate,AvgNegSigRate,MinABSGeCohD,AvgABSGeCohD
0,0.52381,3.803062,4.186322,0.046,0.3525,0.108205,0.208659
1,0.422222,4.075867,4.288296,0.102,0.409,0.12022,0.209306
2,0.427101,4.04529,4.210118,0.102,0.4145,0.120582,0.216268
3,0.432014,3.514848,3.964598,0.128,0.485,0.128522,0.22759
4,0.439446,3.869502,4.019744,0.124,0.466,0.124597,0.224371


In [59]:
PosAggMetricRank[PosMetricList]

Unnamed: 0,IndCentRatio,MinABSSurvCoef,AvgABSSurvCoef,MinPosSigRate,AvgPosSigRate,MinABSGeCohD,AvgABSGeCohD
0,0.507246,4.064988,4.701705,0.002,0.039,0.090825,0.18335
1,0.507246,4.064988,4.704152,0.002,0.039,0.092246,0.182636
2,0.507246,4.064988,4.701705,0.002,0.04,0.090873,0.18251
3,0.529412,3.97479,4.365373,0.0,0.041,0.10913,0.215198
4,0.507246,4.064988,4.704152,0.002,0.041,0.091612,0.182862
