<a href="https://colab.research.google.com/github/Jinzhao-Yu/BioStat615/blob/main/BIOSTAT615_Lecture_17_Fall_2022.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# BIOSTAT615 Lecture 17 - R

## Create a HMM class

In [1]:
#' Create a class hmm615
hmm615 <- setRefClass("hmm615",
              fields=list(n.states = "integer", # num states
                          n.obs = "integer",    # num possible outcomes
                          n.times = "integer",  # num observed data
                          obs = "vector",       # observed data
                          transP = "matrix",    # transition matrix
                          emisP = "matrix",     # emission matrix
                          alphas = "matrix",    # alpha in forward()
                          betas = "matrix",     # beta in backward()
                          gammas = "matrix",    # for forward-backward()
                          deltas = "matrix",    # for Viterbi MLE
                          phis = "matrix",      # for Viterbi path
                          pis = "vector",       # initial probs
                          path = "vector"       # Viterbi path
                     ))

In [2]:
#' method to initialize hmm615 class (called with hmm615$new)
#' @param transMat - transition matrix
#' @param emisMat - emission matrix
#' @param p0s - prior distribution of states
#' @param obsData - observed data points (vector of integers)
hmm615$methods(initialize = function(transMat, emisMat, p0s, obsData) {
  n.states <<- nrow(transMat)
  n.obs <<- ncol(emisMat)
  stopifnot(ncol(transMat) == n.states)
  stopifnot(nrow(emisMat) == n.states)
  n.times <<- length(obsData)
  transP <<- transMat
  emisP <<- emisMat
  obs <<- obsData
  pis <<- p0s
})

In [3]:
#' method to perform forward algorithm
#' fills in the alpha matrix
hmm615$methods(forward = function() {
  alphas <<- matrix(0, n.states, n.times) ## initialize
  alphas[,1] <<- pis * emisP[,obs[1]] ## first element
  for (t in 2:n.times) {  ## use matrix operation to simplify
    alphas[,t] <<- (alphas[,t-1] %*% transP) * emisP[,obs[t]]
  }
})

In [4]:
#' method to perform backward algorithm
#' fills in the beta matrix
hmm615$methods(backward = function(){
  betas <<- matrix(0, n.states, n.times) ## initialize
  betas[,n.times] <<- rep(1,n.states) ## last element
  for (t in seq(n.times-1,1,-1)) { ## matrix operation to simplify
    betas[,t] <<- transP %*% (betas[,t+1] * emisP[,obs[t+1]])
  }
})

In [5]:
#' method to perform forward-backward algorithm
#' fills in the gamma matrix as Pr(states|obs)
hmm615$methods(forwardbackward = function() {
  forward()   # run forward algorithm
  backward()  # run backward algorithm
  gammas <<- alphas * betas # calculate join likelihood
  ## normalize across the states
  gammas <<- gammas / matrix(colSums(gammas),nrow(gammas),ncol(gammas))
})

In [6]:
#' method to perform Viterbi algorithm
#' fills in deltas, phis, paths
hmm615$methods(viterbi = function() {
  ## initialize the matrices and vectors
  deltas <<- matrix(0, n.states, n.times)
  phis <<- matrix(0, n.states, n.times)
  path <<- numeric(length=n.times)

  deltas[,1] <<- pis * emisP[,obs[1]] ## compute MLEs
  for (t in 2:n.times) {
    A = matrix(deltas[,t-1],n.states,n.states,byrow=TRUE) * transP *
           matrix(emisP[,obs[t]],n.states,n.states) # use element-wise operation
    deltas[,t] <<- apply(A, 1, max) ## MLEs
    phis[,t] <<- apply(A, 1, which.max) ## backtrack info
  }
  ## perform backtracking
  path[t] <<- which.max(deltas[,t]) ## choose last element by MLE
  for(t in seq(n.times-1,1,-1)) {  ## backtrack from the last element
    path[t] <<- phis[path[t+1],t+1]
  }
})

## Test HMM on the Biased Coin Problem

In [7]:
trans = matrix(c(0.95, 0.2, 0.05, 0.8),2,2)
emis  = matrix(c(0.5, 0.1, 0.5, 0.9),2,2)
obs = c(2,1,2,1,2,2,1,2,2,2,2,2,2,1,2,2,2,2,2,2)
hmm = hmm615(trans, emis, c(0.5,0.5), obs)

In [8]:
hmm$forwardbackward() ## run forward-backward algorithm
hmm$viterbi()         ## run viterbi algorithm
df = cbind(t(hmm$gammas),hmm$path,hmm$obs)
colnames(df)=c("P.FAIR","P.BIAS","MLPATH","OBS")
print(df)

         P.FAIR    P.BIAS MLPATH OBS
 [1,] 0.5950131 0.4049869      1   2
 [2,] 0.8117823 0.1882177      1   1
 [3,] 0.8070897 0.1929103      1   2
 [4,] 0.8583744 0.1416256      1   1
 [5,] 0.7613304 0.2386696      1   2
 [6,] 0.7276403 0.2723597      1   2
 [7,] 0.7495156 0.2504844      1   1
 [8,] 0.5412806 0.4587194      2   2
 [9,] 0.4186535 0.5813465      2   2
[10,] 0.3532846 0.6467154      2   2
[11,] 0.3300617 0.6699383      2   2
[12,] 0.3436160 0.6563840      2   2
[13,] 0.3970811 0.6029189      2   2
[14,] 0.5028172 0.4971828      2   1
[15,] 0.3725275 0.6274725      2   2
[16,] 0.2984670 0.7015330      2   2
[17,] 0.2635141 0.7364859      2   2
[18,] 0.2595883 0.7404117      2   2
[19,] 0.2857819 0.7142181      2   2
[20,] 0.3481506 0.6518494      2   2
