In [18]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pykrx import stock
from tensorflow.keras.layers import Conv2D, Conv2DTranspose, Dense, Flatten, Dropout, BatchNormalization, Reshape, LeakyReLU
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.callbacks import ModelCheckpoint
import tensorflow as tf
from datetime import datetime, timedelta

In [19]:
df = stock.get_market_ohlcv_by_date("20160701", "20210714", "000660")
cap_df = stock.get_market_cap_by_date("20160701", "20210714", "000660")
volume_df = stock.get_market_trading_volume_by_date("20160701", "20210714", "000660")
fm_df = stock.get_market_fundamental_by_date("20160701", "20210714", "000660")
short_df = stock.get_shorting_status_by_date("20160701", "20210714", "000660")
short_df = short_df.rename(columns={'거래량':'공매도거래량'})
short_df = short_df.rename(columns={'거래대금':'공매도거래대금'})
del cap_df['거래량']
del cap_df['상장주식수']
del volume_df['전체']
df = df.join(cap_df)
df = df.join(volume_df)
df = df.join(fm_df)
df = df.join(short_df)
print(df)

                시가      고가      저가      종가      거래량            시가총액  \
날짜                                                                    
2016-07-01   31800   32700   31600   32150  5289586  23405276034750   
2016-07-04   32250   32900   32050   32750  3192033  23842077453750   
2016-07-05   32650   32650   31850   31850  2577231  23186875325250   
2016-07-06   31100   31350   30300   30600  6314347  22276872369000   
2016-07-07   30750   31350   30600   30650  2715183  22313272487250   
...            ...     ...     ...     ...      ...             ...   
2021-07-08  123500  123500  121000  121500  3165552  88452287347500   
2021-07-09  120500  120500  118000  119500  4823577  86996282617500   
2021-07-12  121000  122000  119500  120000  2477201  87360283800000   
2021-07-13  121000  123500  121000  123000  2879072  89544290895000   
2021-07-14  122500  124000  121500  123500  2443087  89908292077500   

                    거래대금     기관합계   기타법인       개인  ...    BPS    PER   PBR  

In [20]:
df.to_csv('Sample.csv', encoding = "utf-8-sig")

In [21]:
test_array = np.zeros(shape = (21, 1239))
for i in range(21):
    tmp = []
    for j in range(len(df.index)):
        tmp.append(df.iloc[j][i])
    max_tmp = max(tmp)
    min_tmp = min(tmp)
    for j in range(len(tmp)):
        tmp[j] = (tmp[j] - min_tmp)/(max_tmp - min_tmp)
    arr_tmp = np.array(tmp)
    test_array[i] = arr_tmp
    #print(norm_arr.shape)

In [22]:
fin_array = test_array.T
fin_array = fin_array.reshape(-1, 21, 21, 1)
print(fin_array.shape)

(59, 21, 21, 1)


In [23]:
encoder_input = tf.keras.Input(shape=(21, 21, 1))

x = Conv2D(27, 3, padding='same')(encoder_input)
x = BatchNormalization()(x)
x = LeakyReLU()(x)

x = Conv2D(81, 3, strides=3, padding='same')(x)
x = BatchNormalization()(x)
x = LeakyReLU()(x)

x = Conv2D(81, 3, strides=7, padding='same')(x)
x = BatchNormalization()(x)
x = LeakyReLU()(x)

x = Conv2D(81, 3, padding='same')(x)
x = BatchNormalization()(x)
x = LeakyReLU()(x)

x = Flatten()(x)

encoder_output = Dense(2)(x)

encoder = Model(encoder_input, encoder_output)

In [25]:
decoder_input = tf.keras.Input(shape=(2, ))

x = Dense(1*1*81)(decoder_input)
x = Reshape((1, 1, 81))(x)

x = Conv2DTranspose(81, 3, strides=1, padding='same')(x)
x = BatchNormalization()(x)
x = LeakyReLU()(x)

x = Conv2DTranspose(81, 3, strides=7, padding='same')(x)
x = BatchNormalization()(x)
x = LeakyReLU()(x)

x = Conv2DTranspose(81, 3, strides=3, padding='same')(x)
x = BatchNormalization()(x)
x = LeakyReLU()(x)

x = Conv2DTranspose(27, 3, strides=1, padding='same')(x)
x = BatchNormalization()(x)
x = LeakyReLU()(x)

decoder_output = Conv2DTranspose(1, 3, strides=1, padding='same', activation='tanh')(x)

decoder = Model(decoder_input, decoder_output)

In [27]:
LEARNING_RATE = 0.0005
BATCH_SIZE = 32

encoder_in = tf.keras.Input(shape=(21, 21, 1))
x = encoder(encoder_in)
decoder_out = decoder(x)

auto_encoder = Model(encoder_in, decoder_out)
auto_encoder.compile(optimizer=tf.keras.optimizers.Adam(LEARNING_RATE), loss=tf.keras.losses.MeanSquaredError())

In [28]:
checkpoint_path = 'tmp/result.ckpt'
checkpoint = ModelCheckpoint(checkpoint_path, save_best_only=True, save_weights_only=True, monitor='loss', verbose=1)

auto_encoder.fit(fin_array, fin_array, batch_size=BATCH_SIZE, epochs=100, callbacks=[checkpoint], )
auto_encoder.load_weights(checkpoint_path)

Epoch 1/100

  "Even though the `tf.config.experimental_run_functions_eagerly` "



Epoch 00001: loss improved from inf to 0.29631, saving model to tmp\01-basic-auto-encoder.ckpt
Epoch 2/100

Epoch 00002: loss improved from 0.29631 to 0.20368, saving model to tmp\01-basic-auto-encoder.ckpt
Epoch 3/100

Epoch 00003: loss improved from 0.20368 to 0.15313, saving model to tmp\01-basic-auto-encoder.ckpt
Epoch 4/100

Epoch 00004: loss improved from 0.15313 to 0.12523, saving model to tmp\01-basic-auto-encoder.ckpt
Epoch 5/100

Epoch 00005: loss improved from 0.12523 to 0.11684, saving model to tmp\01-basic-auto-encoder.ckpt
Epoch 6/100

Epoch 00006: loss improved from 0.11684 to 0.10516, saving model to tmp\01-basic-auto-encoder.ckpt
Epoch 7/100

Epoch 00007: loss improved from 0.10516 to 0.09560, saving model to tmp\01-basic-auto-encoder.ckpt
Epoch 8/100

Epoch 00008: loss improved from 0.09560 to 0.08915, saving model to tmp\01-basic-auto-encoder.ckpt
Epoch 9/100

Epoch 00009: loss did not improve from 0.08915
Epoch 10/100

Epoch 00010: loss improved from 0.08915 to 0.0

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x1f56440efd0>

In [29]:
decoded_arr = auto_encoder.predict(fin_array)

In [30]:
a_array = fin_array.reshape(1239, 21)
b_array = decoded_arr.reshape(1239, 21)

In [57]:
f = open("flag_1.txt", 'w')
num = np.zeros(shape = (1239, 21))
flag_1 = 0
flag_2 = 0
flag_3 = 0
flag_4 = 0
flag_5 = 0
for i in range(1239):
    for j in range(21):
        num[i][j] = abs(a_array[i][j] - b_array[i][j])
        if (num[i][j] < 0.1):
            flag_1 += 1
            f.write("{} {}\n".format(i, j))
        elif (num[i][j] < 0.3):
            flag_2 += 1
        elif (num[i][j] < 0.5):
            flag_3 += 1
        elif (num[i][j] < 0.7):
            flag_4 += 1
        else:
            flag_5 += 1
#f.close()
print(flag_1, flag_2, flag_3, flag_4, flag_5)

6452 14766 2972 1789 40


In [62]:
np.savetxt('AE_abs.csv', num, delimiter=',')