In [1]:
import numpy as np
from scipy.optimize import brentq
from scipy import integrate
import pandas as pd
import os

## Define classes of frailties

In [2]:
class GammaFrailty:
    """ args
            RR: relative risk (hazard ratio) of prior events
            rate:  relative risk (hazard ratio or risk ratio) of aging per unit of time
            ref_T: period over which hazard ratio is averaged to calculate HRbar, which will be equated to RR
            latency (<ref_T): if enterered, the hazard ratio is averaged over [latency, ref_T] (default, 0)            
            constant: if True, HR is estimated at the moment an event occurred. 
                      in this case, HR is constant over time (applies only to gamma, see Hougaard p.235)
            a : the form parameter, alpha, of PVF distributions (a = 0 with gamma, see Hougaard p.241)
    """
    
    def __init__(self, RR, rate, a = 0, ref_T = 5, latency = 0, constant = False):
        self.RR = RR
        self.rate = rate
        self.ref_T = ref_T     
        self.constant = constant
        self.latency = latency
        self.a = a
        
    def calc_lambda_ratio(self, p0, delta):  # returns lambda0/(log(rate)*thata0) given an inital risk p0
        y = -np.log(1-p0)/delta
        r = (np.exp(y)-1)/(self.rate - 1) 
        return r
    
    def calc_h0(self, p0, theta_0, delta):  # returns h0 = lambda0/log(rate) 
        r = self.calc_lambda_ratio(p0, delta)
        h0 = theta_0*r
        return h0
    
    def calc_A(self, h0, t):        # returns culumative hazard A[t] up to time t
        At = (self.rate**t - 1)*h0
        return At

    # returns logHR[t] of failure at time t vs accumulated failure up to time t (averaged over s in [0,t])
    def lnhr_t(self, t, h0, theta_0, delta, lt_end):
        
        At = self.calc_A(h0, t)

        def weight(s): # the weight w[s] at time s is the prob density of failure at s given t, 
                       # which is -dS[s,t]/ds = S[s,t]*lambda[s|T>t] (Hougaard p.235, (7.38)) 
            As = self.calc_A(h0, s)
            w = self.rate**s / (theta_0 + As + At)**(delta + 1)  # constant factors are dropped 
            return w
        
        def integrand(s): # log(hr[s,t]) at time t of failure at time s compared to non-failure
            As = self.calc_A(h0, s)
            lnhr = np.log(1 + 1/delta) + np.log(theta_0 + 2*At) - np.log(theta_0 + As + At)   
            lnhr *= weight(s)
            return lnhr

        lnHR_t = integrate.quad(integrand, lt_end, t)[0]
        lnHR_t /= integrate.quad(weight, lt_end, t)[0]

        return lnHR_t    
    
    # returns average HR of failure/non-failure in [0, t_ref] in the population with initial risk p0        
    def calc_lnHR(self, t_ref, theta_0, delta, p0, latency):  
        
        h0 = self.calc_h0(p0, theta_0, delta) 
        lt_end = 0 + latency
            
        def weight_t(t):    #   The weight at time t is the survival at time t,
            At = self.calc_A(h0, t)
            w = 1/(theta_0 + At)**delta
            return w
        
        def interval_average(t):
            lnHR_t = self.lnhr_t(t, h0, theta_0, delta, lt_end)
            lnHR_t *= weight_t(t)            
            return lnHR_t
    
        lnHR = integrate.quad(interval_average, lt_end, t_ref)[0] /integrate.quad(weight_t, lt_end, t_ref)[0]  
        
        return lnHR
    
    # given p0 and parameters, returns an array of 1-year risk of an individual who has been event-free    
    def calc_risk(self, p0, theta_0, delta, tau):         
        r = self.calc_lambda_ratio(p0, delta)
        enum  = 1 + (self.rate**(tau) - 1)*r
        denom = 1 + (self.rate**(tau + 1) - 1)*r
        p = 1 - (enum / denom)**delta           
        return p
     
    # given p0 and parameters, returns an array of pointwise HR of prior events    
    def calc_point_HR(self, p0, theta_0, delta, tau):      
        h0 = self.calc_h0(p0, theta_0, delta) 
        lnhr = list( map(lambda y: self.lnhr_t(y, h0, theta_0, delta, 0), tau + 1))
        return np.exp(lnhr)

    def calc_moment(self, p_0, theta_0, delta, t, order):  # order = 1: return mean, else: return CV (sd/mean) Hougaard, p 505
        h0 = self.calc_h0(p_0, theta_0, delta)
        At = self.calc_A(h0, t)
        theta = theta_0 + At
        mu = delta * theta**(self.a - 1) 
        if order == 1:
            return mu
        return np.sqrt( (1 - self.a)/(delta * theta**self.a ) )
        
    # finds theta0 such that RR is equal to the average HR up to ref_T, in a population with initial risk p0      
    def find_theta_0(self, p0):
                    
        def dif_lnhr(theta_0):                
            delta = theta_0**(1 - self.a)                
            ref_lnHR = self.calc_lnHR(self.ref_T, theta_0, delta, p0, self.latency)           
            return ref_lnHR - np.log(self.RR) 
        
        theta_0 = brentq(dif_lnhr, 0.02, 10)
        
        delta = theta_0**(1 - self.a)
            
        return theta_0, delta
    
    def predict(self, T = 5, r0 = np.arange(1,9)*0.01): 
        """ args:
                T: maximal time up to which risk is updated
                r0: initial risk estimates
            values:  a tuple
                [0] a dataframe of updated risks
                [1] a dataframe of point-wise hazard ratio at year k
                [2] a dataframe of parameters [theta0, delta] for r0
                [3] propportion of zero-risk individuals (only relevant for compound Poisson)
                [4] mean of the frailties among survivors
                [5] coefficinet of variation of the frailties among survivors
        """        
        res = pd.DataFrame()
        hr = pd.DataFrame()
        params = []
        tau = np.arange(T+1)
        
        for p0 in r0:
            
            if self.constant and self.a == 0:
                theta_0 = 1 /(self.RR - 1) 
                delta = theta_0
            else:
                theta_0, delta = self.find_theta_0(p0) 
            
            if self.a == 0:                
                hr_0 = [1 + 1/delta]
            else:
                y = (1- self.a) * theta_0 **(-self.a) + self.a
                hr_0 = [y/delta]
            
            hr_k   = self.calc_point_HR(p0, theta_0, delta, tau)
            hr_0.extend(hr_k)
            
            res_k = self.calc_risk(p0, theta_0, delta, tau)
            res[p0] = res_k
            
            hr[p0] = hr_0
            params.append([theta_0, delta])
            
        zero_risk = np.exp( delta * theta_0**self.a / self.a) if self.a < 0 else 0  # proportion of immunity (relavent only to comp pois)
        params = pd.DataFrame(params)
        params.columns = ["theta_0", "delta"]
        params.index = r0
        
        mu = list(map(lambda t: self.calc_moment(r0, theta_0, delta, t, 1), tau) )
        cv = list(map(lambda t: self.calc_moment(r0, theta_0, delta, t, 2), tau) )
        mu = pd.DataFrame(mu)    # mean of the frailties over time
        cv = pd.DataFrame(cv)    # cv of the frailties over time
        mu.columns = r0
        cv.columns = r0
        
        return res, hr, params, zero_risk, mu, cv

In [3]:
class PVFFrailty(GammaFrailty):    
    """ 
    a = 0.5 with inverse gaussian 
    a < 0 with compound poisson 
    """
    def __init__(self, RR, rate, a = 0.5, ref_T = 5, latency = 0):
        super().__init__(RR = RR, rate = rate, a = a, ref_T = ref_T, latency = latency)
        
    def calc_h0(self, p0, theta_0, delta):  # returns h0 = lambda0/log(rate)        
        y = - self.a * np.log(1 - p0)/delta + theta_0**self.a      
        y =  y**(1/self.a) - theta_0
        y /= self.rate - 1        
        return y
    
    def calc_S(self, theta_1, theta_2, delta):  # returns survival (see Hougaard p.505, (A.17))
        b = theta_2**self.a - theta_1**self.a
        S = np.exp( - delta * b /self.a ) 
        return S

    def lnhr_t(self, t, h0, theta_0, delta, lt_end): 

        At = self.calc_A(h0, t) 

        def weight(s):                       # see Hougaard p.505, (A.18)
            As = self.calc_A(h0, s)   
            theta_s = theta_0 + As + At
            S = self.calc_S(theta_0, theta_s, delta)
            w = S * self.rate**s * theta_s**(self.a - 1)  # constant factors are dropped
            return w

        def integrand(s):   # see Hougaard p.244 (7.60)
            As = self.calc_A(h0, s)
            y = (1- self.a) * (theta_0 + As + At)**(-self.a) + self.a
            lnhr = np.log(y) - np.log(delta)
            lnhr *= weight(s)
            return lnhr

        lnHR_t = integrate.quad(integrand, lt_end, t)[0]
        lnHR_t /= integrate.quad(weight, lt_end, t)[0]

        return lnHR_t    
    
    def calc_lnHR(self, t_ref, theta_0, delta, p0, latency):
        
        h0 = self.calc_h0(p0, theta_0, delta) 
        lt_end = 0 + latency
        
        def weight_t(t):
            At = self.calc_A(h0, t)
            w = self.calc_S(theta_0, theta_0 + At, delta)
            return w

        def interval_average(t):
            lnHR_t = self.lnhr_t(t, h0, theta_0, delta, lt_end)
            lnHR_t *= weight_t(t)            
            return lnHR_t
       
        lnHR  = integrate.quad(interval_average, lt_end, t_ref)[0] 
        lnHR /= integrate.quad(weight_t, lt_end, t_ref)[0]         
        return lnHR        
        
    def calc_risk(self, p0, theta_0, delta, tau):
        h0 = self.calc_h0(p0, theta_0, delta)
        theta_1  = theta_0 + self.calc_A(h0, tau)
        theta_2  = theta_0 + self.calc_A(h0, tau + 1)
        S = list( map(lambda x, y: self.calc_S(x, y, delta), theta_1, theta_2) )
        p = 1 - np.array(S) 
        return p 

In [4]:
class BetaBinom(GammaFrailty): 

    def __init__(self, RR, rate, ref_T = 5, constant = False, latency = 0):
        super().__init__(RR = RR, rate = rate, ref_T = ref_T, constant = constant, latency = latency)
    
    # returns relative risk, RR[n], at n-th year of accumutlated failure up to n-th year
    def calc_rr_n(self, a, latency, n, tau_0):  
        
        p = np.array( [a * self.rate**k / (tau_0 + k) for k in range(latency, n + 2)] ) # k runs over indices     
        S = [1]
        for k in range(n +1 - latency):       # k runs over indices     
            S.append( S[-1] * (1 - p[k]) )
        S = np.array(S)   
        w  = p * S           
        
        def calc_rr_km(k, m):   # returns pairwise RR, rr[m,k] of failure in (k+1)-th year vs survival at (m+1)-th year 
            rr_km =  1 + (self.rate - 1) / ( a * np.log(self.rate) * self.rate**k )
            rr_km *= (tau_0 + m)/(tau_0 + k)
            return rr_km
 
        rr = []
        for k in range(latency, n):  # k runs over indices       
            w_k = 0
            rr_k = 0   
            
            for j in range(latency, k+1):  # j runs over time
                wj = w[j - latency]

                w_k += wj
                rr_k += wj*calc_rr_km(j+1, k+1)
                
            rr_k /= w_k
            rr.append(rr_k)
        return rr
                
    def calc_rr(self, a, latency, n, p0): # returns average relative risk, RRbar, up to ref_T       
        tau_0 = a/p0       
        p = np.array( [a * self.rate**k / (tau_0 + k) for k in range(latency, n + 2)] ) # k runs over indices         
        S = [1]
        for k in range(n +1 - latency):  # k runs over indices     
            S.append( S[-1] * (1 - p[k]) )
        S = np.array(S)    
        rr = self.calc_rr_n(a, latency, n, tau_0)       
        RR = 0
        weight = 0   
        for k in range(n - latency): # k runs over indices    
            RR += rr[k] * S[k+1]
            weight += S[k+1]    
        RR /= weight    
        return  RR
   
    def find_a(self, p0): # finds the parameter a0 such that RRbar is equal to RR, in a population with initial risk p0            
        def calibrate(a):     
            avRR = self.calc_rr(a, self.latency, self.ref_T,  p0) 
            return np.log(avRR) - np.log(self.RR )       
        a0 = brentq(calibrate, 0.2, 5)
        return a0

    def calc_risk_constant(self, tau, p0):
        enum = p0*self.rate**tau
        denom = 1 + p0*(self.RR - 1)*(self.rate**tau - 1)/(self.rate - 1)
        r = enum/denom
        return r
    
    # given p0 and parameters, returns an array of pointwise HR of prior events    
    def calc_point_RR(self, r0, a0, prec, tau): 
        RR = list( map(lambda a, tau_0: self.calc_rr_n(a, 0, tau[-1] + 1, tau_0), a0, prec))
        RR = pd.DataFrame(RR)
        RR.index = r0 
        RR.columns += 1
        return RR.T
    
    def predict(self, T = 5, r0 = np.arange(1,9)*0.01):
        """
        args:
            T:  maximal time up to which risk is updated
            r0: initial risk estimates
        values: a tuple
            [0] updated risks
            [1] averaged risk ratios up to t
            [2] parameters, a and precision
        """
        tau = np.arange(T+1)
        if self.constant:
            a0  =  [1/(self.RR-1)]*8          #  list of parameter a0 
            res = list(map( lambda p0: self.calc_risk_constant(tau, p0), r0 ))              
        else:            
            a0  = list(map(self.find_a, r0))  #  list of parameter a0 
            res = list( map(lambda a, r: a * self.rate**tau / ((a/r) + tau), a0, r0))
        prec = a0/r0        # list of parameter tau_0
        res = pd.DataFrame(res).T        
        res.columns = r0  
        RR = self.calc_point_RR(r0, a0, prec, tau) 
        params = pd.DataFrame({"a": a0, "precision": prec}) 
        params.index = r0 
        return res, RR, params

## Gamma-av

In [5]:
mod = GammaFrailty(RR = 2.5, rate = 1.5**0.1, a = 0, ref_T = 5, latency = 0, constant = False)   
pred = mod.predict(T = 5, r0 = np.arange(1,9)*0.01)

In [6]:
pred[0]

Unnamed: 0,0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08
0,0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08
1,0.010259,0.020233,0.02995,0.039432,0.048696,0.057757,0.066627,0.075317
2,0.010521,0.020462,0.029902,0.038901,0.047506,0.055755,0.06368,0.071309
3,0.010786,0.020687,0.029856,0.038405,0.046417,0.053959,0.061086,0.067842
4,0.011052,0.020908,0.029813,0.03794,0.045417,0.05234,0.058787,0.064816
5,0.011321,0.021124,0.029771,0.037504,0.044496,0.050875,0.056736,0.062154


## Gamma-c

In [7]:
mod = GammaFrailty(RR = 2.5, rate = 1.5**0.1, a = 0, ref_T = 5, latency = 0, constant = True)   
pred = mod.predict(T = 5, r0 = np.arange(1,9)*0.01)

In [8]:
pred[0]

Unnamed: 0,0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08
0,0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08
1,0.010255,0.020201,0.029852,0.039221,0.04832,0.057161,0.065754,0.074109
2,0.010512,0.020397,0.029711,0.038501,0.04681,0.054676,0.062134,0.069215
3,0.010771,0.02059,0.029577,0.037834,0.045446,0.052486,0.059015,0.065087
4,0.011032,0.020778,0.029449,0.037215,0.044209,0.050541,0.056301,0.061563
5,0.011296,0.020962,0.029328,0.036639,0.043083,0.048805,0.05392,0.058519


In [9]:
pred[1]

Unnamed: 0,0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08
0,2.5,2.5,2.5,2.5,2.5,2.5,2.5,2.5
1,2.518664,2.537159,2.555487,2.57365,2.59165,2.609489,2.627168,2.644691
2,2.53744,2.573282,2.607634,2.640591,2.672238,2.702656,2.731918,2.760088
3,2.556312,2.608391,2.656717,2.701695,2.743671,2.782944,2.819771,2.854379
4,2.575268,2.642508,2.702979,2.757682,2.807422,2.852858,2.894531,2.932898
5,2.594293,2.675652,2.746636,2.809152,2.864654,2.914276,2.958916,2.999294
6,2.613375,2.707846,2.78788,2.856608,2.916298,2.968642,3.014928,3.056157


## Inverse Gaussian

In [10]:
mod = PVFFrailty(RR = 2.5, rate = 1.5**0.1, a = 0.5, ref_T = 5, latency = 0)   
pred = mod.predict(T = 5, r0 = np.arange(1,9)*0.01)

In [11]:
pred[0]

Unnamed: 0,0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08
0,0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08
1,0.010239,0.020123,0.02965,0.038828,0.047666,0.056178,0.06438,0.07229
2,0.010482,0.020265,0.029395,0.037927,0.045918,0.05342,0.060485,0.067161
3,0.01073,0.020425,0.029218,0.037235,0.044585,0.051365,0.057654,0.063524
4,0.010983,0.020601,0.029108,0.036707,0.043561,0.049802,0.055535,0.060843
5,0.01124,0.020794,0.029056,0.036311,0.042774,0.048603,0.05392,0.058818


## Beta-binomial-av

In [12]:
mod = BetaBinom(RR = 2.5, rate = 1.5**0.1, ref_T = 5, latency = 0, constant = False)   
pred = mod.predict(T = 5, r0 = np.arange(1,9)*0.01)

In [13]:
pred[0]

Unnamed: 0,0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08
0,0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08
1,0.010255,0.020215,0.029911,0.039368,0.048608,0.057648,0.066503,0.075187
2,0.010518,0.020449,0.029876,0.038864,0.047465,0.055722,0.063671,0.071342
3,0.010791,0.020703,0.029891,0.03847,0.046529,0.05414,0.061358,0.068232
4,0.011073,0.020977,0.029952,0.038173,0.045769,0.052839,0.059461,0.065695
5,0.011365,0.021269,0.030057,0.037963,0.045161,0.051774,0.057901,0.063615
