In [1]:
# coding: utf-8
import numpy as np
from sklearn.model_selection import train_test_split
from gensim.utils import simple_preprocess

import keras
from keras.models import load_model
from keras.preprocessing.sequence import pad_sequences

import pandas as pd
import matplotlib.pyplot as plt

Using TensorFlow backend.


In [2]:
%run common.ipynb
%run word2vec_loader.ipynb

In [3]:
# Read pickle and split training and testing sets
df = pd.read_pickle('train_reduce.pkl')
train_df, test_df = train_test_split(df, test_size=0.8, shuffle=False)
train_df, test_df = test_df, train_df

In [4]:
# Generate word embedding matrix and word2idx dict
embedding_matrix, word2idx = createEmbeddingMatrix(EMBEDDING_DIM)

Word2Vec(vocab=8723, size=50, alpha=0.025)
embedding_matrix.shape=(8724, 50)


In [5]:
# Create training inputs
x_texts_idx = text_to_index(train_df['ari_title'], word2idx)
x_texts_idx_pad = pad_sequences(x_texts_idx, maxlen=MAX_WORD_COUNT)
X_train = x_texts_idx_pad

Y_train = train_df['price']
assert(len(X_train) == len(Y_train))
print(X_train.shape, Y_train.shape)

(634, 579) (634,)


In [6]:
# Create training inputs
x_texts_idx = text_to_index(test_df['ari_title'], word2idx)
x_texts_idx_pad = pad_sequences(x_texts_idx, maxlen=MAX_WORD_COUNT)
X_test = x_texts_idx_pad

Y_test = test_df['price']
assert(len(X_test) == len(Y_test))
print(X_test.shape, Y_test.shape)

(158, 579) (158,)


In [7]:
# Load model
# model = load_model('models/my_model.h5')
# model.summary()

# Truncate evaluation history file
hist_file_path = './evaluation_history.txt'
with open(hist_file_path, 'w+') as f:
    f.truncate()
print('Truncated: %s\n' % hist_file_path)

epoches=[1]
for epoch in epoches:
    model_path = 'models_intermediate/epoch%04d.h5' % epoch
    print('Loading model:', model_path)
    model = load_model(model_path)
    
    # Model evaluation with training set
    num_train_first = len(X_train)  # 385
    train_loss_accuracy = model.evaluate(X_train[0:num_train_first], Y_train[0:num_train_first], verbose=1)
    print('loss=%.4f, accuracy=%.4f\n' % (train_loss_accuracy[0], train_loss_accuracy[1]))

    # Model evaluation with testing set
    test_loss_accuracy = model.evaluate(X_test, Y_test, verbose=1)
    print('loss=%.04f, accuracy=%.04f' % (test_loss_accuracy[0], test_loss_accuracy[1]))

    # Write to file
    with open(hist_file_path, 'a+') as f:
        # epoch,num_train_test,train_test_loss,train_test_accuracy,num_test,test_loss,test_accuracy
        f.write('%05d,%05d,%.04f,%.04f,%05d,%.04f,%.04f\n' % (epoch,
            num_train_first, train_loss_accuracy[0], train_loss_accuracy[1],
            len(X_test),     test_loss_accuracy[0],  test_loss_accuracy[1]))
    
    print('Appended: %s\n' % hist_file_path)


Truncated: ./evaluation_history.txt

Loading model: models_intermediate/epoch0001.h5
loss=0.6905, accuracy=0.5631

loss=0.6900, accuracy=0.6013
Appended: ./evaluation_history.txt



In [8]:
# Predict by the model
# predict_result = model.predict(X_test, batch_size= 1, verbose=0)

# 格式化輸出結果成 1 和 0
# 預測結果小於0.5的保存爲0,表示判斷爲會下跌。否則表示上漲或有限小數位的不變。
# binarized_result = list(map(lambda val: int(round(val[0])), predict_result))

# Print first N items for checking
# N = 10
# for pv,fv in zip(predict_result[:N], binarized_result[:N]):
#     print(str(pv[0])[:4], str(fv).rjust(3))

In [9]:
# 和真實的股票指數的變化比較，輸出預測的準確率
# x1 = binarized_result
# x2 = Y_train[-100:]

# acc_i = 0
# for v1, v2 in zip(x1, x2):
#     acc_i += 1 if v1 == v2 else 0
# print("預測準確率爲:", acc_i / len(x1))

In [10]:
# x2 = Y_train

# pre_ress = np.random.randint(2, size=10)
# acc_j = 0
# for v1,v2 in zip(pre_ress, x2):
#     acc_j += 1 if v1 == v2 else 0
# print("random 1 準確率爲:", acc_j / len(pre_ress))

# pre_ress = np.random.randint(2, size=10)
# acc_j = 0
# for v1,v2 in zip(pre_ress, x2):
#     acc_j += 1 if v1 == v2 else 0
# print("random 2 準確率爲:", acc_j / len(pre_ress))

# pre_ress = np.random.randint(2, size=10)
# acc_j = 0
# for v1,v2 in zip(pre_ress, x2):
#     acc_j += 1 if v1 == v2 else 0
# print("random 3 準確率爲:", acc_j / len(pre_ress))