In [None]:
########## Do not run this cell
from hmmlearn import hmm
import numpy as np

class CustomHMM:
  def __init__(self):
      def build_hmm():
          model = hmm.GMMHMM(n_components=3, n_mix=3, covariance_type="diag", init_params="t")
          model.transmat_ = np.array([[0.5, 0.5, 0.0],
                                      [0.0, 0.5, 0.5],
                                      [0.0, 0.0, 1.0]])
          return model

      self.hmm_0 = build_hmm()
      self.hmm_1 = build_hmm()

  def fit(self, X_train, y_train):
      # X_train shape(n_instances, n_samples)
      labels = set(y_train)
      if len(labels) != 2:
          raise Exception("y_train doesn't contain 2 classes")
      X_0 = X_train[y_train == 0, :]
      X_1 = X_train[y_train == 1, :]

      self.hmm_0.fit(X_0)
      self.hmm_1.fit(X_1)

  def predict(self, X_test):
      res = []
      for x in X_test:
           res.append(0 if self.hmm_0.score(x) > self.hmm_1.score(x) else 1)
      return np.array(res)

In [1]:
#### run this cell
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import statsmodels.formula.api as smf
import statsmodels.api as sm
import seaborn as sns
from scipy import stats
sns.set(style='ticks', color_codes=True)

  from pandas.core import datetools


In [39]:
df = pd.read_csv('IBM.csv', sep=',')

In [40]:
df['priceold'] = df.Close.shift(1)

In [41]:
df = df[1:]

In [42]:
df['Up'] = 1

In [43]:
df.loc[df.Close < df.priceold, 'Up'] = 0

In [45]:
import sklearn
from sklearn.model_selection import train_test_split

In [58]:
target = df['Up']
rows, columns = df.shape
data = df.iloc[:,1:(columns-1)]

In [68]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,priceold,Up
1,2013-04-08,209.070007,209.320007,207.330002,209.320007,179.015778,2351200,209.410004,0
2,2013-04-09,209.559998,210.0,208.610001,209.220001,178.930252,2577300,209.320007,0
3,2013-04-10,211.860001,212.509995,210.470001,212.0,181.307785,3524200,209.220001,1
4,2013-04-11,211.5,213.089996,210.850006,212.919998,182.094604,3655600,212.0,1
5,2013-04-12,210.789993,211.490005,209.779999,211.380005,180.777557,3165400,212.919998,0


In [67]:
data.head()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,priceold
1,209.070007,209.320007,207.330002,209.320007,179.015778,2351200,209.410004
2,209.559998,210.0,208.610001,209.220001,178.930252,2577300,209.320007
3,211.860001,212.509995,210.470001,212.0,181.307785,3524200,209.220001
4,211.5,213.089996,210.850006,212.919998,182.094604,3655600,212.0
5,210.789993,211.490005,209.779999,211.380005,180.777557,3165400,212.919998


In [74]:
Close = df.Close.values # 收盘价
Volume = df.Volume.values # 交易数
High = df.High.values # 最高价格
Low = df.Low.values # 最低价格
diff = np.diff(data.Close)[4:]
volume_5 = np.log(Volume[5:]) - np.log(Volume[:-5])
close_5 = np.log(Close[5:]) - np.log(Close[:-5])
logDel = np.log(High[5:]) - np.log(Low[5:])
dates = pd.to_datetime(df.Date[5:])
close = data.Close[5:]

In [75]:
X = np.column_stack([volume_5, close_5, logDel])
y = np.array(target[5:])

In [76]:
RS = 20150101
X_train, X_test, y_train, y_test = train_test_split(X, y.astype(int), random_state=RS, test_size=0.25)

In [77]:
X_0 = X_train[y_train == 0, :]
X_1 = X_train[y_train == 1, :]

In [85]:
from hmmlearn import hmm 
model0 = hmm.GaussianHMM(6, covariance_type = "diag", n_iter = 300)
model0.fit(X_0)
model1 = hmm.GaussianHMM(6, covariance_type = "diag", n_iter = 300)
model1.fit(X_1)




GaussianHMM(algorithm='viterbi', covariance_type='diag', covars_prior=0.01,
      covars_weight=1, init_params='stmc', means_prior=0, means_weight=0,
      min_covar=0.001, n_components=6, n_iter=300, params='stmc',
      random_state=None, startprob_prior=1.0, tol=0.01, transmat_prior=1.0,
      verbose=False)

In [90]:
import math
res = []
for x in X_test:
    lp0 = model0.score([x])
    lp1 = model1.score([x])
    #print(lp0)
    #res.append((math.exp(lp1 - lp0) + 1) ** (-1))
    if lp0 > lp1:
        res.append(0)
    else:
        res.append(1)






In [91]:
res

[1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,


In [95]:
sum(res == y_test) / len(res)

0.55776892430278879