# UnSupervisedIOHMM

In [2]:
from __future__ import  division

import json
import warnings

import numpy as np
import pandas as pd

from IOHMM import UnSupervisedIOHMM
from IOHMM import OLS, DiscreteMNL, CrossEntropyMNL

warnings.simplefilter("ignore")

## Load speed data

In [3]:
data = pd.read_csv("data/Financial-Data/stocks/TSLA.csv")
data.head()
data = data.dropna()

input_data = data[['Open', 'High', 'Low', 'Close', 'Volume']][:-1]


# close price of the next day
output_data = data['Close'][1:]
# to start from 0 index
output_data.index = range(len(output_data))

output_data.name = 'Close-next-day'

data = pd.concat([input_data, output_data], axis=1)

data.head()
print(len(data))

3259


# Example 1

## Set up a simple model manully

In [4]:
# set two hidden states
SHMM = UnSupervisedIOHMM(num_states=3, max_EM_iter=200, EM_tol=1e-6)

# This model has only one output which is modeled by a linear regression model
SHMM.set_models(model_emissions = [OLS()], 
                model_transition=CrossEntropyMNL(solver='lbfgs'),
                model_initial=CrossEntropyMNL(solver='lbfgs'))

# We don't set any covariates to this OLS model
SHMM.set_inputs(covariates_initial = ['Open', 'High', 'Low', 'Close', 'Volume'], covariates_transition = ['Open', 'High', 'Low', 'Close', 'Volume'], covariates_emissions = [['Open', 'High', 'Low', 'Close', 'Volume'],['Open', 'High', 'Low', 'Close', 'Volume'],['Open', 'High', 'Low', 'Close', 'Volume']])

SHMM.set_outputs([['Close-next-day']])

# we only have a list of one sequence.
SHMM.set_data([data])

## Start training

In [37]:
SHMM.train()

## See the training results

In [29]:
# The coefficients of the OLS model for each hidden states
print(SHMM.model_emissions[0][0].coef)
print(SHMM.model_emissions[1][0].coef)

[[ 1.03092483e-02  8.17116084e-02 -2.13798059e-01 -2.64387126e-02
   1.15832428e+00  5.07907722e-10]]
[[ 2.42026668e+00 -5.59737313e-02  7.10208882e-02  1.19251234e-01
   8.58551538e-01 -3.58995073e-09]]


In [30]:
# The scale/dispersion of the OLS model for each hidden states
print(np.sqrt(SHMM.model_emissions[0][0].dispersion))
print(np.sqrt(SHMM.model_emissions[1][0].dispersion))

[[0.38967379]]
[[8.5626673]]


In [31]:
# The transition probability between two hidden states
print(np.exp(SHMM.model_transition[0].predict_log_proba(np.array([[]]))))
print(np.exp(SHMM.model_transition[1].predict_log_proba(np.array([[]]))))

ValueError: X has 1 features, but LogisticRegression is expecting 6 features as input.