In [21]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
from keras.layers import Input, Dense
from keras.models import Model
from keras.callbacks import TensorBoard

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 30)
pd.set_option('precision', 7)
pd.options.display.float_format = '{:,.3f}'.format
import warnings
warnings.simplefilter(action = "ignore", category = FutureWarning)

'''
读入一支股票指定年份的ohlcv数据
输入:baseDir,stockCode为字符, startYear,yearNum为整数，
输出:dataframe
'''
def readWSDFile(baseDir, stockCode, startYear, yearNum=1):
    # 解析日期
    filename = baseDir+stockCode+'/'+stockCode+'.csv'
    print (filename, "===============")
    dateparse = lambda x: pd.datetime.strptime(x, '%Y/%m/%d').date()
    df = pd.read_csv(filename, index_col=0, sep=',', header=None,
                            skiprows=1, names=['Date','Pre_Close','Open','High','Low','Close','Chg','Chg_Range',
                                               'Volume','Amount','Turn'],
                           parse_dates=True, date_parser=dateparse)
    return df['2013-01-04':'2015-12-31']

'''
读入一支股票指定年份的技术指标
输入:baseDir,stockCode为字符, startYear,yearNum为整数，
输出:dataframe
'''
def readWSDIndexFile(baseDir, stockCode, startYear, yearNum=1):
    # 解析日期
    dateparse = lambda x: pd.datetime.strptime(x, '%Y/%m/%d').date()

    df = 0
    for i in range(yearNum):
        tempDF = pd.read_csv(baseDir+'I'+stockCode+'/wsd_'+stockCode+'_'+str(startYear+i)+'.csv', index_col=0, sep=',', parse_dates=True, date_parser=dateparse
                             # , usecols=usecols
                             )
        if i==0: df = tempDF
        else: df = df.append(tempDF)
    return df


# prepare data
baseDir = '../'
stockCodes = ['000300.SH']
i = 0
startYear = 2013
number = 3
df = readWSDFile(baseDir, stockCodes[i], startYear, number)
dfi = readWSDIndexFile(baseDir, stockCodes[i], startYear, number)
allDF = pd.concat([df, dfi], axis=1)
print ("Factors Shape:", np.shape(df), np.shape(dfi))

# normalize
scaler = preprocessing.MinMaxScaler()
input_data = scaler.fit_transform(allDF)
shape = np.shape(input_data)
print ("input data shape: ", shape) # 489 day * 50 factors
sample_num = shape[0]
dim_num = shape[1]
ratio = 0.9
threshold = (int)(len(input_data) * ratio)
x_train, x_test = input_data[:threshold], input_data[threshold:]

############# AutoEncoder Model ##########################
encoding_dim = 30   # reduce dimensionality
batch_size = 20
epochs = 350
input_factors = Input(shape=(dim_num,))
# "encoded" is the encoded representation of the input
encoded = Dense(encoding_dim, activation='relu')(input_factors)
# "decoded" is the lossy reconstruction of the input
decoded = Dense(dim_num, activation='sigmoid')(encoded)
# this model maps an input to its reconstruction
autoencoder = Model(input_factors, decoded)

# compile
#autoencoder.compile(optimizer='rmsprop', loss='categorical_crossentropy')
#autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')
#autoencoder.compile(optimizer='rmsprop', loss='binary_crossentropy')
autoencoder.compile(loss='mean_squared_error', optimizer='adam')
# training
autoencoder.fit(x_train, x_train,
                epochs=epochs,
                batch_size=batch_size,
                shuffle=True,
                validation_data=(x_test, x_test))#, callbacks=[TensorBoard(log_dir='/home/dclab/tmp/feature_extract/')])


Factors Shape: (727, 10) (727, 38)
input data shape:  (727, 48)
Train on 654 samples, validate on 73 samples
Epoch 1/350
Epoch 2/350
Epoch 3/350
Epoch 4/350
Epoch 5/350
Epoch 6/350
Epoch 7/350
Epoch 8/350
Epoch 9/350
Epoch 10/350
Epoch 11/350
Epoch 12/350
Epoch 13/350
Epoch 14/350
Epoch 15/350
Epoch 16/350
Epoch 17/350
Epoch 18/350
Epoch 19/350
Epoch 20/350
Epoch 21/350
Epoch 22/350
Epoch 23/350
Epoch 24/350
Epoch 25/350
Epoch 26/350
Epoch 27/350
Epoch 28/350
Epoch 29/350
Epoch 30/350
Epoch 31/350
Epoch 32/350
Epoch 33/350
Epoch 34/350
Epoch 35/350
Epoch 36/350
Epoch 37/350
Epoch 38/350
Epoch 39/350
Epoch 40/350
Epoch 41/350
Epoch 42/350
Epoch 43/350
Epoch 44/350
Epoch 45/350
Epoch 46/350
Epoch 47/350
Epoch 48/350
Epoch 49/350
Epoch 50/350
Epoch 51/350
Epoch 52/350
Epoch 53/350
Epoch 54/350
Epoch 55/350
Epoch 56/350
Epoch 57/350
Epoch 58/350
Epoch 59/350
Epoch 60/350
Epoch 61/350
Epoch 62/350
Epoch 63/350
Epoch 64/350
Epoch 65/350
Epoch 66/350
Epoch 67/350
Epoch 68/350
Epoch 69/350
Epo

Epoch 88/350
Epoch 89/350
Epoch 90/350
Epoch 91/350
Epoch 92/350
Epoch 93/350
Epoch 94/350
Epoch 95/350
Epoch 96/350
Epoch 97/350
Epoch 98/350
Epoch 99/350
Epoch 100/350
Epoch 101/350
Epoch 102/350
Epoch 103/350
Epoch 104/350
Epoch 105/350
Epoch 106/350
Epoch 107/350
Epoch 108/350
Epoch 109/350
Epoch 110/350
Epoch 111/350
Epoch 112/350
Epoch 113/350
Epoch 114/350
Epoch 115/350
Epoch 116/350
Epoch 117/350
Epoch 118/350
Epoch 119/350
Epoch 120/350
Epoch 121/350
Epoch 122/350
Epoch 123/350
Epoch 124/350
Epoch 125/350
Epoch 126/350
Epoch 127/350
Epoch 128/350
Epoch 129/350
Epoch 130/350
Epoch 131/350
Epoch 132/350
Epoch 133/350
Epoch 134/350
Epoch 135/350
Epoch 136/350
Epoch 137/350
Epoch 138/350
Epoch 139/350
Epoch 140/350
Epoch 141/350
Epoch 142/350
Epoch 143/350
Epoch 144/350
Epoch 145/350
Epoch 146/350
Epoch 147/350
Epoch 148/350
Epoch 149/350
Epoch 150/350
Epoch 151/350
Epoch 152/350
Epoch 153/350
Epoch 154/350
Epoch 155/350
Epoch 156/350
Epoch 157/350
Epoch 158/350
Epoch 159/350
Epoc

Epoch 173/350
Epoch 174/350
Epoch 175/350
Epoch 176/350
Epoch 177/350
Epoch 178/350
Epoch 179/350
Epoch 180/350
Epoch 181/350
Epoch 182/350
Epoch 183/350
Epoch 184/350
Epoch 185/350
Epoch 186/350
Epoch 187/350
Epoch 188/350
Epoch 189/350
Epoch 190/350
Epoch 191/350
Epoch 192/350
Epoch 193/350
Epoch 194/350
Epoch 195/350
Epoch 196/350
Epoch 197/350
Epoch 198/350
Epoch 199/350
Epoch 200/350
Epoch 201/350
Epoch 202/350
Epoch 203/350
Epoch 204/350
Epoch 205/350
Epoch 206/350
Epoch 207/350
Epoch 208/350
Epoch 209/350
Epoch 210/350
Epoch 211/350
Epoch 212/350
Epoch 213/350
Epoch 214/350
Epoch 215/350
Epoch 216/350
Epoch 217/350
Epoch 218/350
Epoch 219/350
Epoch 220/350
Epoch 221/350
Epoch 222/350
Epoch 223/350
Epoch 224/350
Epoch 225/350
Epoch 226/350
Epoch 227/350
Epoch 228/350
Epoch 229/350
Epoch 230/350
Epoch 231/350
Epoch 232/350
Epoch 233/350
Epoch 234/350
Epoch 235/350
Epoch 236/350
Epoch 237/350
Epoch 238/350
Epoch 239/350
Epoch 240/350
Epoch 241/350
Epoch 242/350
Epoch 243/350
Epoch 

Epoch 256/350
Epoch 257/350
Epoch 258/350
Epoch 259/350
Epoch 260/350
Epoch 261/350
Epoch 262/350
Epoch 263/350
Epoch 264/350
Epoch 265/350
Epoch 266/350
Epoch 267/350
Epoch 268/350
Epoch 269/350
Epoch 270/350
Epoch 271/350
Epoch 272/350
Epoch 273/350
Epoch 274/350
Epoch 275/350
Epoch 276/350
Epoch 277/350
Epoch 278/350
Epoch 279/350
Epoch 280/350
Epoch 281/350
Epoch 282/350
Epoch 283/350
Epoch 284/350
Epoch 285/350
Epoch 286/350
Epoch 287/350
Epoch 288/350
Epoch 289/350
Epoch 290/350
Epoch 291/350
Epoch 292/350
Epoch 293/350
Epoch 294/350
Epoch 295/350
Epoch 296/350
Epoch 297/350
Epoch 298/350
Epoch 299/350
Epoch 300/350
Epoch 301/350
Epoch 302/350
Epoch 303/350
Epoch 304/350
Epoch 305/350
Epoch 306/350
Epoch 307/350
Epoch 308/350
Epoch 309/350
Epoch 310/350
Epoch 311/350
Epoch 312/350
Epoch 313/350
Epoch 314/350
Epoch 315/350
Epoch 316/350
Epoch 317/350
Epoch 318/350
Epoch 319/350
Epoch 320/350
Epoch 321/350
Epoch 322/350
Epoch 323/350
Epoch 324/350
Epoch 325/350
Epoch 326/350
Epoch 

Epoch 339/350
Epoch 340/350
Epoch 341/350
Epoch 342/350
Epoch 343/350
Epoch 344/350
Epoch 345/350
Epoch 346/350
Epoch 347/350
Epoch 348/350
Epoch 349/350
Epoch 350/350


<keras.callbacks.History at 0x7faec080a898>

In [None]:
#optimizer='rmsprop', loss='binary_crossentropy',  encoding_dim = 20,  loss: 0.4696 - val_loss: 0.6308
#optimizer='adadelta', loss='binary_crossentropy' loss: 0.4767 - val_loss: 0.6546
#optimizer='rmsprop', encoding_dim = 40, loss: 0.4782 - val_loss: 0.6248
# encoding_dim = 8, loss: 0.4850 - val_loss: 0.6339
# encoding_dim = 18, loss: 0.4795 - val_loss: 0.6230
# encoding_dim = 30, loss: 0.4787 - val_loss: 0.6227
#loss='mean_squared_error', optimizer='adam', loss: 4.6516e-04 - val_loss: 0.0044