In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pykrx import stock
from tensorflow.keras.layers import Conv2D, Conv2DTranspose, Dense, Flatten, Dropout, BatchNormalization, Reshape, LeakyReLU
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.callbacks import ModelCheckpoint
import tensorflow as tf
from datetime import datetime, timedelta

In [27]:
df = stock.get_market_ohlcv_by_date("19990106", "20210714", "000660")
volume_df = stock.get_market_trading_volume_by_date("19990106", "20210714", "000660")
del volume_df['기타법인']
del volume_df['전체']
df = df.join(volume_df)
df = df.dropna()
print(df)

                시가      고가      저가      종가      거래량       기관합계         개인  \
날짜                                                                          
1999-01-06  364223  382090  354602  354601  1048060    28270.0   841250.0   
1999-01-07  407517  407517  384839  407517  3325700 -1242610.0  2322640.0   
1999-01-08  399958  399958  362161  369720  1105840   -66980.0   336250.0   
1999-01-11  361474  389650  344981  364909  1321090   -53250.0   419310.0   
1999-01-12  364910  382090  360099  364222   777710   -56680.0   205950.0   
...            ...     ...     ...     ...      ...        ...        ...   
2021-07-08  123500  123500  121000  121500  3165552  -172724.0  1062210.0   
2021-07-09  120500  120500  118000  119500  4823577  -439187.0  1576301.0   
2021-07-12  121000  122000  119500  120000  2477201   -65203.0   174530.0   
2021-07-13  121000  123500  121000  123000  2879072   116372.0 -1093033.0   
2021-07-14  122500  124000  121500  123500  2443087  -106894.0  -393989.0   

In [28]:
df.to_csv('Sample_2.csv', encoding = "utf-8-sig")

In [29]:
test_array = np.zeros(shape = (8, 5552))
for i in range(8):
    tmp = []
    for j in range(len(df.index)):
        tmp.append(df.iloc[j][i])
    max_tmp = max(tmp)
    min_tmp = min(tmp)
    print(max_tmp, min_tmp)
    for j in range(len(tmp)):
        tmp[j] = (tmp[j] - min_tmp)/(max_tmp - min_tmp)
    arr_tmp = np.array(tmp)
    test_array[i] = arr_tmp
    #print(norm_arr.shape)

754502.0 2650.0
770480.0 2944.0
713670.0 2454.0
718996.0 2650.0
1832794266.0 494550.0
9929240.0 -208355414.0
207103064.0 -27931553.0
30005613.0 -38321590.0


In [31]:
fin_array = test_array.T
fin_array = fin_array.reshape(-1, 8, 8, 1)
print(fin_array.shape)

(694, 8, 8, 1)


In [33]:
encoder_input = tf.keras.Input(shape=(8, 8, 1))

x = Conv2D(32, 2, padding='same')(encoder_input)
x = BatchNormalization()(x)
x = LeakyReLU()(x)

x = Conv2D(64, 2, strides=2, padding='same')(x)
x = BatchNormalization()(x)
x = LeakyReLU()(x)

x = Conv2D(64, 2, strides=2, padding='same')(x)
x = BatchNormalization()(x)
x = LeakyReLU()(x)

x = Conv2D(64, 2, padding='same')(x)
x = BatchNormalization()(x)
x = LeakyReLU()(x)

x = Flatten()(x)

encoder_output = Dense(2)(x)

encoder = Model(encoder_input, encoder_output)

In [34]:
decoder_input = tf.keras.Input(shape=(2, ))

x = Dense(2*2*64)(decoder_input)
x = Reshape((2, 2, 64))(x)

x = Conv2DTranspose(64, 2, strides=1, padding='same')(x)
x = BatchNormalization()(x)
x = LeakyReLU()(x)

x = Conv2DTranspose(64, 2, strides=1, padding='same')(x)
x = BatchNormalization()(x)
x = LeakyReLU()(x)

x = Conv2DTranspose(64, 2, strides=2, padding='same')(x)
x = BatchNormalization()(x)
x = LeakyReLU()(x)

x = Conv2DTranspose(32, 2, strides=2, padding='same')(x)
x = BatchNormalization()(x)
x = LeakyReLU()(x)

decoder_output = Conv2DTranspose(1, 2, strides=1, padding='same', activation='tanh')(x)

decoder = Model(decoder_input, decoder_output)

In [35]:
LEARNING_RATE = 0.0005
BATCH_SIZE = 32

encoder_in = tf.keras.Input(shape=(8, 8, 1))
x = encoder(encoder_in)
decoder_out = decoder(x)

auto_encoder = Model(encoder_in, decoder_out)
auto_encoder.compile(optimizer=tf.keras.optimizers.Adam(LEARNING_RATE), loss=tf.keras.losses.MeanSquaredError())

In [37]:
checkpoint_path = 'tmp/result_2.ckpt'
checkpoint = ModelCheckpoint(checkpoint_path, save_best_only=True, save_weights_only=True, monitor='loss', verbose=1)

auto_encoder.fit(fin_array, fin_array, batch_size=BATCH_SIZE, epochs=30, callbacks=[checkpoint], )
auto_encoder.load_weights(checkpoint_path)

Epoch 1/30

Epoch 00001: loss improved from inf to 0.00056, saving model to tmp\result_2.ckpt
Epoch 2/30

Epoch 00002: loss improved from 0.00056 to 0.00052, saving model to tmp\result_2.ckpt
Epoch 3/30

Epoch 00003: loss did not improve from 0.00052
Epoch 4/30

Epoch 00004: loss did not improve from 0.00052
Epoch 5/30

Epoch 00005: loss improved from 0.00052 to 0.00048, saving model to tmp\result_2.ckpt
Epoch 6/30

Epoch 00006: loss improved from 0.00048 to 0.00046, saving model to tmp\result_2.ckpt
Epoch 7/30

Epoch 00007: loss did not improve from 0.00046
Epoch 8/30

Epoch 00008: loss improved from 0.00046 to 0.00046, saving model to tmp\result_2.ckpt
Epoch 9/30

Epoch 00009: loss did not improve from 0.00046
Epoch 10/30

Epoch 00010: loss did not improve from 0.00046
Epoch 11/30

Epoch 00011: loss did not improve from 0.00046
Epoch 12/30

Epoch 00012: loss did not improve from 0.00046
Epoch 13/30

Epoch 00013: loss did not improve from 0.00046
Epoch 14/30

Epoch 00014: loss did not

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x221875586a0>

In [38]:
decoded_arr = auto_encoder.predict(fin_array)

In [40]:
a_array = fin_array.reshape(5552, 8)
b_array = decoded_arr.reshape(5552, 8)

In [43]:
# f = open("flag_1.txt", 'w')
num = np.zeros(shape = (5552, 8))
flag_1 = 0
flag_2 = 0
flag_3 = 0
flag_4 = 0
flag_5 = 0
for i in range(5552):
    for j in range(8):
        num[i][j] = abs(a_array[i][j] - b_array[i][j])
        if (num[i][j] < 0.1):
            flag_1 += 1
            # f.write("{} {}\n".format(i, j))
        elif (num[i][j] < 0.3):
            flag_2 += 1
        elif (num[i][j] < 0.5):
            flag_3 += 1
        elif (num[i][j] < 0.7):
            flag_4 += 1
        else:
            flag_5 += 1
#f.close()
print(flag_1, flag_2, flag_3, flag_4, flag_5)

44188 219 5 2 2


In [45]:
np.savetxt('AE_abs_2.csv', num, delimiter=',')