Acknowledgement:
    1. The structure of the code is inspired by depmixS4: An R Package for Hidden Markov Models: https://cran.r-project.org/web/packages/depmixS4/vignettes/depmixS4.pdf
    2. Some of the linear model codes are adapted from sklearn: http://scikit-learn.org/stable/ and statsmodel: http://statsmodels.sourceforge.net/. Some modifications have been made to these codes to realize more functionalities.

Problems with existing packages
    1. Some of sklearn and statsmodels does not support the implementation of sample weights
    2. Some of sklearn and statsmodels does not support l1, l2 or elasticnet regularizations
    3. Sklearn packages does not support estimation of standard deviation of coefficients
    4. The likelihood function of weighted linear models is not the same as the ones we need to use in IO-HMM
    5. In the R package aformentioned, they do not support the provision of multiple sequences.

Modifications to above packages:
    1. Implemented supervised models that supports sample weights
    2. Supports the estimation of standard deviations of coefficients
    3. Supports multiple regularizations (l1, l2, elastic net) in most of the supervised models. (However,  if regularization is applied, no standard deviation of the coefficients will be estimated)
    4. Supports estimation over multiple sequences (multiple dataframes)
    5. HMM forward-backward code was implemented at the log scale so that it is more robust to long sequences.
    6. Supports generalized linear models with different link functions, just as statsmodel.

In [1]:
from __future__ import  division
import sys
sys.path.append('./auxiliary')
import numpy as np
from copy import deepcopy
from SupervisedModels import *
from HMM import *
import pandas as pd
import warnings
warnings.simplefilter("ignore")

In [2]:
class SupervisedHMM:
    def __init__(self, num_states = 2, EM_tol = 1e-4, max_EM_iter = 100):
        self.num_states = num_states
        self.EM_tol = EM_tol
        self.max_EM_iter = max_EM_iter
        
    def setModels(self, model_emissions, model_initial = MNLP(), model_transition = MNLP()):
        # initial model and transition model must be MNLP
        self.model_initial = model_initial
        self.model_transition = [deepcopy(model_transition) for i in range(self.num_states)]
        self.model_emissions = [deepcopy(model_emissions) for i in range(self.num_states)]
        self.num_emissions = len(model_emissions)
    
    def setData(self, dfs):
        self.num_seqs = len(dfs)
        self.dfs = dfs
        
    
    def setInputs(self, covariates_initial, covariates_transition, covariates_emissions):
        # input should be a list inidicating the columns of the dataframe
        self.inp_initials = [np.array(df[covariates_initial].iloc[0]).reshape(1,-1).astype('float64') for df in self.dfs]
        self.inp_initials_all_users = np.vstack(self.inp_initials)
        self.model_initial.coef = np.zeros((self.inp_initials_all_users.shape[1]+self.model_initial.fit_intercept,self.num_states))
        self.model_initial.coef = np.random.rand(self.inp_initials_all_users.shape[1]+self.model_initial.fit_intercept,self.num_states)
        
        self.inp_transitions = [np.array(df[covariates_transition].iloc[1:]).astype('float64') for df in self.dfs]
        self.inp_transitions_all_users = np.vstack(self.inp_transitions)
        
        for st in range(self.num_states):
            self.model_transition[st].coef = np.zeros((self.inp_transitions_all_users.shape[1]+self.model_transition[st].fit_intercept,self.num_states))
            self.model_transition[st].coef = np.random.rand(self.inp_transitions_all_users.shape[1]+self.model_transition[st].fit_intercept,self.num_states)
        self.inp_emissions = []
        self.inp_emissions_all_users = []
        for cov in covariates_emissions:
            self.inp_emissions.append([np.array(df[cov]).astype('float64') for df in self.dfs])
        for covs in self.inp_emissions:
            self.inp_emissions_all_users.append(np.vstack(covs))
        
        
    
    def setOutputs(self, responses_emissions):
        # output should be a list inidicating the columns of the dataframe
        self.out_emissions = []
        self.out_emissions_all_users = []
        for res in responses_emissions:
            self.out_emissions.append([np.array(df[res]) for df in self.dfs])
        for ress in self.out_emissions:
            self.out_emissions_all_users.append(np.vstack(ress))
        for i in range(self.num_states):
            for j in range(self.num_emissions):
                if isinstance(self.model_emissions[i][j], GLM):
                    self.model_emissions[i][j].coef = np.random.rand(self.inp_emissions_all_users[j].shape[1]+self.model_emissions[i][j].fit_intercept,)
                    self.model_emissions[i][j].dispersion = 1
                if isinstance(self.model_emissions[i][j], LM):
                    if len(responses_emissions[j]) == 1:
                        self.model_emissions[i][j].coef = np.random.rand(self.inp_emissions_all_users[j].shape[1]+self.model_emissions[i][j].fit_intercept,)
                        self.model_emissions[i][j].dispersion = 1
                    else:
                        self.model_emissions[i][j].coef = np.random.rand(self.inp_emissions_all_users[j].shape[1]+self.model_emissions[i][j].fit_intercept, len(responses_emissions[j]))
                        self.model_emissions[i][j].dispersion = np.eye(len(responses_emissions[j]))
                if isinstance(self.model_emissions[i][j], MNLD):
                    self.model_emissions[i][j].coef = np.random.rand(self.inp_emissions_all_users[j].shape[1]+self.model_emissions[i][j].fit_intercept,np.unique(self.out_emissions_all_users[j]).shape[0])
                    self.model_emissions[i][j].lb = LabelBinarizer().fit(self.out_emissions_all_users[j])
                if isinstance(self.model_emissions[i][j], MNLP):
                    self.model_emissions[i][j].coef = np.random.rand(self.inp_emissions_all_users[j].shape[1]+self.model_emissions[i][j].fit_intercept,len(responses_emissions[j]))
    def EStep(self):
        self.log_gammas = []
        self.log_epsilons = []
        self.lls = []
        
        for seq in range(self.num_seqs):
            n_records = self.dfs[seq].shape[0]
            log_prob_initial = self.model_initial.predict_log_probability(self.inp_initials[seq]).reshape(self.num_states,)
            assert log_prob_initial.shape == (self.num_states,)
            log_prob_transition = np.zeros((n_records - 1, self.num_states, self.num_states))
            for st in range(self.num_states):
                 log_prob_transition[:,st,:] = self.model_transition[st].predict_log_probability(self.inp_transitions[seq]) 
            assert log_prob_transition.shape == (n_records-1,self.num_states,self.num_states)
            
            log_Ey = np.zeros((n_records,self.num_states))
            for emis in range(self.num_emissions):
                model_collection = [models[emis] for models in self.model_emissions]
                log_Ey += np.vstack([model.log_probability(self.inp_emissions[emis][seq],
                                                           self.out_emissions[emis][seq]) for model in model_collection]).T

            
            log_gamma, log_epsilon, ll = calHMM(log_prob_initial, log_prob_transition, log_Ey)
            self.log_gammas.append(log_gamma)
            self.log_epsilons.append(log_epsilon)
            self.lls.append(ll)
            self.ll = sum(self.lls)

        
    def MStep(self):
        # optimize initial model
        X = self.inp_initials_all_users
        Y = np.exp(np.vstack([lg[0,:].reshape(1,-1) for lg in self.log_gammas]))
        logY = np.vstack([lg[0,:].reshape(1,-1) for lg in self.log_gammas])
        self.model_initial.fit(X, Y)
        
        # optimize transition models
        X = self.inp_transitions_all_users
        for st in range(self.num_states):
            Y = np.exp(np.vstack([eps[:,st,:] for eps in self.log_epsilons]))
            logY = np.vstack([eps[:,st,:] for eps in self.log_epsilons])
            self.model_transition[st].fit(X, Y)
        
        # optimize emission models
        for emis in range(self.num_emissions):
            X = self.inp_emissions_all_users[emis]
            Y = self.out_emissions_all_users[emis]
            for st in range(self.num_states):
                sample_weight = np.exp(np.hstack([lg[:,st] for lg in self.log_gammas]))
                self.model_emissions[st][emis].fit(X, Y, sample_weight = sample_weight)
        
    
    def train(self):
        self.EStep()
        for it in range(self.max_EM_iter):
            prev_ll = self.ll
            self.MStep()
            self.EStep()
            print self.ll
            if abs(self.ll-prev_ll) < self.EM_tol:
                break

        self.converged = it < self.max_EM_iter

## Example use of SupervisedHMM 

## Speed data - example 1

In [3]:
speed = pd.read_csv('data/speed.csv')
print speed.head()

   Unnamed: 0        rt corr  Pacc prev
0           1  6.456770  cor     0  inc
1           2  5.602119  cor     0  cor
2           3  6.253829  inc     0  cor
3           4  5.451038  inc     0  inc
4           5  5.872118  inc     0  inc


In [4]:
SHMM = SupervisedHMM(num_states=2, max_EM_iter=1000, EM_tol=1e-2)
SHMM.setData([speed])
SHMM.setModels(model_emissions = [LM()], model_transition=MNLP(solver='lbfgs'))
SHMM.setInputs(covariates_initial = [], covariates_transition = [], covariates_emissions = [[]])
SHMM.setOutputs([['rt']])

In [5]:
SHMM.train()

-305.30641873
-305.29378195
-305.274713904
-305.245594903
-305.200705055
-305.130364911
-305.018000279
-304.818558049
-304.339960414
-303.506375595
-301.962867468
-298.290911443
-286.359839043
-248.905715617
-152.432808889
-96.7690172002
-90.885944949
-92.6376716642
-92.4443643017
-92.4104966065
-92.4190258813


In [6]:
print np.exp(SHMM.model_transition[0].coef - logsumexp(SHMM.model_transition[0].coef))
print np.exp(SHMM.model_transition[1].coef - logsumexp(SHMM.model_transition[1].coef))

[[ 0.80204745  0.19795255]]
[[ 0.08819023  0.91180977]]


In [7]:
print SHMM.model_emissions[0][0].coef
print SHMM.model_emissions[1][0].coef

[ 5.50556371]
[ 6.38251669]


In [8]:
print np.sqrt(SHMM.model_emissions[0][0].dispersion)
print np.sqrt(SHMM.model_emissions[1][0].dispersion)

0.18629268081
0.245356156697


## Speed data - example 2

In [9]:
SHMM = SupervisedHMM(num_states=2, max_EM_iter=1000, EM_tol=1e-2)
SHMM.setData([speed])
SHMM.setModels(model_emissions = [LM(), MNLD()], model_transition=MNLP(solver='lbfgs'))
SHMM.setInputs(covariates_initial = [], covariates_transition = [], covariates_emissions = [[],['Pacc']])
SHMM.setOutputs([['rt'],['corr']])

In [10]:
SHMM.train()

-530.387395754
-529.970861057
-529.231828106
-527.905447619
-525.243311938
-518.933324038
-498.918262138
-441.874310539
-380.04722621
-324.528871437
-306.690513817
-301.446580694
-301.301058889
-300.371132429
-302.652867277
-302.562558986
-302.541810513
-302.54447759


In [11]:
print np.exp(SHMM.model_transition[0].coef - logsumexp(SHMM.model_transition[0].coef))
print np.exp(SHMM.model_transition[1].coef - logsumexp(SHMM.model_transition[1].coef))

[[ 0.90819425  0.09180575]]
[[ 0.19763784  0.80236216]]


In [12]:
print SHMM.model_emissions[0][0].coef
print SHMM.model_emissions[0][1].coef
print SHMM.model_emissions[1][0].coef
print SHMM.model_emissions[1][1].coef

[ 6.38514982]
[[ 0.         -1.07651522]
 [ 0.         -2.28921204]]
[ 5.50896916]
[[ 0.         -0.22113539]
 [ 0.          0.62348445]]


In [13]:
print SHMM.model_emissions[0][0].sd
print SHMM.model_emissions[0][1].sd
print SHMM.model_emissions[1][0].sd
print SHMM.model_emissions[1][1].sd

[ 0.01499407]
[[ 0.          0.38489839]
 [ 0.          0.8114583 ]]
[ 0.01396695]
[[ 0.          0.15889298]
 [ 0.          0.73586072]]
