This is the IOHMM model with the parameters learned in a supervised way. This is corresponding to the counting frequency process as in the supervised HMM. See notes in http://www.cs.columbia.edu/4761/notes07/chapter4.3-HMM.pdf.

# Example use of Supervised_IOHMM 

In [1]:
%load_ext autoreload
%autoreload 2

from __future__ import  division
import numpy as np
from copy import deepcopy
import sys
sys.path.append('./main')
sys.path.append('./auxiliary/')
from IOHMM import SupervisedIOHMM, SupervisedIOHMMMapReduce
from SupervisedModels import LM, MNLP
from scipy.misc import logsumexp
import pandas as pd
import warnings
warnings.simplefilter("ignore")

## Speed data

### The sequence data

In [2]:
speed = pd.read_csv('data/speed.csv')
speed.head()

Unnamed: 0.1,Unnamed: 0,rt,corr,Pacc,prev
0,1,6.45677,cor,0.0,inc
1,2,5.602119,cor,0.0,cor
2,3,6.253829,inc,0.0,cor
3,4,5.451038,inc,0.0,inc
4,5,5.872118,inc,0.0,inc


### The labeled states

In our structure of the code, the states should be a dictionary, the key is the index in the sequence (e.g. 0, 5) and the value is a one-out-of-n code of array where the kth value is 1 if the hidden state is k. n is the number of states in total.

In the following example, we assume that the "corr" column gives the correct hidden states.

In [3]:
states = {}
corr = np.array(speed['corr'])
for i in range(len(corr)):
    state = np.zeros((2,))
    if corr[i] == 'cor':
        states[i] = np.array([0,1])
    else:
        states[i] = np.array([1,0])

## Setting up the model

In [4]:
SHMM = SupervisedIOHMM(num_states=2)
SHMM.setModels(model_emissions = [LM()], model_transition=MNLP(solver='lbfgs'))
SHMM.setInputs(covariates_initial = [], covariates_transition = [], covariates_emissions = [[]])
SHMM.setOutputs([['rt']])
SHMM.setData([[speed, states]])

## Start training

In [5]:
SHMM.train()

## See coefficients

In [6]:
print np.exp(SHMM.model_transition[0].coef - logsumexp(SHMM.model_transition[0].coef))
print np.exp(SHMM.model_transition[1].coef - logsumexp(SHMM.model_transition[1].coef))

[[ 0.3839286  0.6160714]]
[[ 0.21165644  0.78834356]]


In [7]:
print SHMM.model_emissions[0][0].coef
print SHMM.model_emissions[1][0].coef

[ 5.70451774]
[ 6.13678825]


In [8]:
print np.sqrt(SHMM.model_emissions[0][0].dispersion)
print np.sqrt(SHMM.model_emissions[1][0].dispersion)

0.358317806144
0.473560336628


## MapReduce Version

In [9]:
sc.stop()
sc = SparkContext(appName="Python_UnSupervised_IOHMM_MapReduce", pyFiles=[
    './auxiliary/HMM.py',
    './auxiliary/SupervisedModels.py',
    './auxiliary/family.py',
    './main/IOHMM.py'])

In [10]:
speed = pd.read_csv('data/speed.csv')
states = {}
corr = np.array(speed['corr'])
for i in range(len(corr)):
    state = np.zeros((2,))
    if corr[i] == 'cor':
        states[i] = np.array([0,1])
    else:
        states[i] = np.array([1,0])

In [11]:
indexes = [(1,1), (2,1)]
RDD = sc.parallelize(indexes)
dfs_states = RDD.mapValues(lambda v: [speed, states])

In [12]:
SHMM = SupervisedIOHMMMapReduce(num_states=2)
SHMM.setModels(model_emissions = [LM()], model_transition=MNLP(solver='lbfgs'))
SHMM.setInputs(covariates_initial = [], covariates_transition = [], covariates_emissions = [[]])
SHMM.setOutputs([['rt']])
SHMM.setData(dfs_states)
SHMM.train()
print 'done'

done


In [13]:
print np.exp(SHMM.model_transition[0].coef - logsumexp(SHMM.model_transition[0].coef))
print np.exp(SHMM.model_transition[1].coef - logsumexp(SHMM.model_transition[1].coef))

[[ 0.3839286  0.6160714]]
[[ 0.21165644  0.78834356]]


In [14]:
print SHMM.model_emissions[0][0].coef
print SHMM.model_emissions[1][0].coef

[ 5.70451774]
[ 6.13678825]


In [15]:
print np.sqrt(SHMM.model_emissions[0][0].dispersion)
print np.sqrt(SHMM.model_emissions[1][0].dispersion)

0.358317806144
0.473560336628


In [16]:
sc.stop()