#### IMPORTING LIBRARIES

In [58]:
import pandas as pd
import numpy as np
import random
import copy
import warnings
from scipy.stats import norm

warnings.filterwarnings("ignore")

In [59]:
def choose_random_process(transition_rates):
    transition_rates_copy = transition_rates.copy()
    number = random.random()
    
    if number<transition_rates_copy[0]:
        return 0
    else:
        for i in range(1,len(transition_rates_copy)):
            transition_rates_copy[i]+=transition_rates_copy[i-1]
            if number<transition_rates_copy[i]:
                return i
                break

def log_sum_exp(log_probs):
    max_log_prob = np.max(log_probs)  
    return max_log_prob + np.log(np.sum(np.exp(log_probs - max_log_prob)))

#### GENERATING SYNTHETIC DATA

In [60]:
hidden_states = ['CN','MCI','D']
state_probs = np.array([0.8,0.15,0.05])          # PI

transition_probs = np.array([[0.65,0.34,0.01],   # A
                             [0.25,0.5,0.25],
                             [0.01,0.2,0.81]])

state_effect = np.array([-0.9,-0.5,-0.2])        # V
biomarker_mean = np.array([10,15,20])            # Mu
biomarker_variance = np.ones((3,))               # Sigma

sigma_m_2 = 2                                    # Personalized medication effects variance
sigma_r_2 = 2                                    # Patient heterogeneity variance

In [65]:
number_of_patients = 1
time_points = 4
medication_data = np.random.choice(2,(1,4))

patient_data = np.zeros((number_of_patients,time_points))
patient_states = np.zeros((number_of_patients,time_points),dtype=object)
m = []
r = []

for patient in range(number_of_patients):

    m_i = np.random.normal(0,sigma_m_2,1)
    r_i = np.random.normal(0,sigma_r_2,1)

    m.append(m_i)
    r.append(r_i)
    
    for time in range(time_points):
        if time==0:
            state = choose_random_process(state_probs)
            patient_states[patient][time] = hidden_states[state]
            biomarker_value = np.random.normal(biomarker_mean[state]+r_i+(state_effect[state]+m_i)*medication_data[patient][time],biomarker_variance[state])
            patient_data[patient][time] = biomarker_value
            
        else:
            state = choose_random_process(transition_probs[state])
            patient_states[patient][time] = hidden_states[state]
            biomarker_value = np.random.normal(biomarker_mean[state]+r_i+(state_effect[state]+m_i)*medication_data[patient][time],biomarker_variance[state])
            patient_data[patient][time] = biomarker_value

In [66]:
print('Patient States:\n',patient_states)
print('\nPatient Data:\n',patient_data)
print('\nPersonalized effect:',m)
print('\nIndividual heteregeneity:',r)

Patient States:
 [['CN' 'MCI' 'MCI' 'MCI']]

Patient Data:
 [[12.76594262 18.90367845 12.47986508 14.6259942 ]]

Personalized effect: [array([4.79097795])]

Individual heteregeneity: [array([-1.94967689])]


## PERSONALIZED INPUT-OUTPUT HIDDEN MARKOV MODEL

#### INITIALIZING GLOBAL PARAMETERS (THETA)

In [25]:
hidden_states = ['CN','MCI','D']
state_probs = np.array([0.5,0.25,0.25])          # PI

transition_probs = np.array([[0.5,0.25,0.25],   # A
                             [0.3,0.4,0.3],
                             [0.2,0.2,0.6]])

medication_effect = np.array([0.5,0.5,0.5])      # V
biomarker_mean = np.array([10,10,10])            # Mu
biomarker_variance = np.full((3,),2)             # Sigma

sigma_m_2 = 3                                    # Personalized medication effected variance
sigma_r_2 = 3                                    # Heterogeneity

#### INITIALIZING VARIATIONAL/LOCAL PARAMETERS (LAMBDA)

In [26]:
mu_m = np.random.normal(1,1,number_of_patients)
l_m  = np.random.normal(1,1,number_of_patients)
mu_r = np.random.normal(1,1,number_of_patients)
l_r  = np.random.normal(1,1,number_of_patients)

#### CALCULATE PATIENT HETEROGENEITY AND PERSONALIZED EFFECT

In [27]:
m = l_m*np.random.normal(0,1,number_of_patients) + mu_m
r = l_r*np.random.normal(0,1,number_of_patients) + mu_r

print('Personalized effect:',m)
print('Individual heteregeneity:',r)

Personalized effect: [2.23531887 6.88116898]
Individual heteregeneity: [0.25523585 2.58721082]


#### FORWARD ALGORITHM

In [103]:
def forward_algorithm(patient_data,medication_data,
                      hidden_states,state_probs,transition_probs,
                      medication_effect,biomarker_mean,biomarker_variance,m,r):

    state_probs = np.log(state_probs)
    transition_probs = np.log(transition_probs)
    emission_probs = np.zeros((len(hidden_states),len(patient_data)))
    probability_matrix = np.zeros((len(hidden_states),len(patient_data)))
    
    for time in range(len(patient_data)):
        biomarker_value = patient_data[time]
        
        for current_state in hidden_states:
            current_state_index = hidden_states.index(current_state)
            
            if time==0:
                transition = state_probs[current_state_index]
                emission = np.log(norm.pdf(patient_data[time],\
                                    loc=biomarker_mean[current_state_index] + r + (medication_effect[current_state_index] + m)*medication_data[time],\
                                    scale=(biomarker_variance[current_state_index])**0.5))
                emission_probs[current_state_index][time] = emission
                probability = transition + emission
                probability_matrix[current_state_index][time] = probability
    
            else:
                log_probability = []
                for previous_state in hidden_states:
                    previous_state_index = hidden_states.index(previous_state)
                    transition = transition_probs[previous_state_index][current_state_index]
                    emission = np.log(norm.pdf(patient_data[time],\
                               loc=biomarker_mean[current_state_index] + r + (medication_effect[current_state_index] + m)*medication_data[time],\
                               scale=(biomarker_variance[current_state_index])**0.5))
                    emission_probs[current_state_index][time] = emission
                    probability = probability_matrix[previous_state_index][time-1] + transition + emission
                    log_probability.append(probability)
             
                probability_matrix[current_state_index][time] = log_sum_exp(np.array(log_probability))

    return log_sum_exp(probability_matrix[:,-1]),probability_matrix

In [104]:
forward_algorithm(patient_data[0],medication_data[0],
                  hidden_states,state_probs,transition_probs,
                  medication_effect,biomarker_mean,biomarker_variance,m[0],r[0])

(-8.086806189677652,
 array([[ -1.30760076, -18.12734326, -15.57913577, -29.16315302],
        [-18.35836942,  -3.46348305,  -5.23827893,  -8.09171234],
        [-59.83377394, -16.67795574, -21.28371138, -13.40652443]]))

#### BACKWARD ALGORITHM

In [105]:
def backward_algorithm(patient_data,medication_data,
                      hidden_states,state_probs,transition_probs,
                      medication_effect,biomarker_mean,biomarker_variance,m,r):
    
    state_probs = np.log(state_probs)
    transition_probs = np.log(transition_probs)
    emission_probs = np.zeros((len(hidden_states),len(patient_data)))
    probability_matrix = np.zeros((len(hidden_states),len(patient_data)))
    
    for time in range(len(patient_data)-1,-1,-1):
        biomarker_value = patient_data[time]
        
        for current_state in hidden_states:
            current_state_index = hidden_states.index(current_state)

            if time==len(patient_data)-1:
                probability_matrix[current_state_index][time] = np.log(1)
    
            else:
                log_probability = []
                for next_state in hidden_states:
                    next_state_index = hidden_states.index(next_state)
                    transition = transition_probs[current_state_index][next_state_index]
                    emission = np.log(norm.pdf(patient_data[time+1],\
                               loc=biomarker_mean[next_state_index] + r + (medication_effect[next_state_index] + m)*medication_data[time+1],\
                               scale=(biomarker_variance[next_state_index])**0.5))
                    emission_probs[current_state_index][time] = emission
                    probability = probability_matrix[next_state_index][time+1] + transition + emission
                    log_probability.append(probability)
                
                probability_matrix[current_state_index][time] = log_sum_exp(np.array(log_probability))
    
    emission = np.log(norm.pdf(patient_data[0],\
                        loc=biomarker_mean + r + (medication_effect + m)*medication_data[0],\
                        scale=(biomarker_variance)**0.5))
    probability = log_sum_exp(state_probs + emission + probability_matrix[:,0])
    return probability,probability_matrix

In [106]:
backward_algorithm(patient_data[0],medication_data[0],
                  hidden_states,state_probs,transition_probs,
                  medication_effect,biomarker_mean,biomarker_variance,m[0],r[0])

(-8.086806189677654,
 array([[-6.77920549, -5.00892498, -3.23882861,  0.        ],
        [-6.39353155, -4.62332416, -2.84854916,  0.        ],
        [-7.3097343 , -5.53963424, -3.73068096,  0.        ]]))

#### EXPECTATION MAXIMIZATION ALGORITHM

In [111]:
def EM(patient_data,medication_data,
      hidden_states,state_probs,transition_probs,
      medication_effect,biomarker_mean,biomarker_variance,m,r):

    probs,forward_pass = forward_algorithm(patient_data,medication_data,
                                           hidden_states,state_probs,transition_probs,
                                           medication_effect,biomarker_mean,biomarker_variance,m,r)
    probs,backward_pass = backward_algorithm(patient_data,medication_data,
                                            hidden_states,state_probs,transition_probs,
                                            medication_effect,biomarker_mean,biomarker_variance,m,r)
    
    # EXPECTATION STEP

    transition_probs = np.log(transition_probs)
    gamma = np.zeros((len(hidden_states),len(patient_data)))
    shi_matrix = np.zeros((len(patient_data)-1,len(hidden_states),len(hidden_states)))

    for state_index in range(len(hidden_states)):
        for time in range(len(patient_data)):

            gamma[state_index][time] = forward_pass[state_index][time] + backward_pass[state_index][time] - probs

    for time in range(len(patient_data)-1):
        for state_1 in range(len(hidden_states)):
            for state_2 in range(len(hidden_states)):

                biomarker_value = patient_data[time+1]
                emission = np.log(norm.pdf(patient_data[time+1],\
                           loc=biomarker_mean[state_2] + r + (medication_effect[state_2] + m)*medication_data[time+1],\
                           scale=(biomarker_variance[state_2])**0.5))
               
                shi_matrix[time][state_1][state_2] = forward_pass[state_1][time] +\
                                                     transition_probs[state_1][state_2] +\
                                                     emission +\
                                                     backward_pass[state_2][time+1] -\
                                                     probs

    print('Gamma:')
    print(gamma)
    print()
    print('Shi Matrix:')
    print(shi_matrix)
    print()

In [112]:
EM(patient_data[0],medication_data[0],hidden_states,state_probs,transition_probs,medication_effect,biomarker_mean,biomarker_variance,m[0],r[0])

Gamma:
[[-5.78683768e-08 -1.50494620e+01 -1.07311582e+01 -2.10763468e+01]
 [-1.66650948e+01 -1.02072988e-06 -2.18980575e-05 -4.90614871e-03]
 [-5.90567021e+01 -1.41307838e+01 -1.69275862e+01 -5.31971824e+00]]

Shi Matrix:
[[[-1.50494621e+01 -1.07859760e-06 -1.41307848e+01]
  [-3.30557422e+01 -1.66651073e+01 -2.79626776e+01]
  [-7.77500225e+01 -5.90568025e+01 -6.82625088e+01]]

 [[-2.44395081e+01 -1.50495456e+01 -3.48103281e+01]
  [-1.07311594e+01 -2.29187835e-05 -1.69275921e+01]
  [-2.71645079e+01 -1.41307863e+01 -2.89664914e+01]]

 [[-3.04617762e+01 -1.07314475e+01 -1.88794525e+01]
  [-2.10764308e+01 -4.92814641e-03 -5.31971988e+00]
  [-4.03407391e+01 -1.69666513e+01 -2.01895790e+01]]]

