In [1]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Wed Jul 26 03:38:15 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   36C    P0    25W / 300W |      0MiB / 16384MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 27.3 gigabytes of available RAM

You are using a high-RAM runtime!


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pandas_datareader as pdr
from datetime import datetime,timedelta
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense ,LSTM, Dropout,Bidirectional,TimeDistributed
from sklearn.model_selection import train_test_split, KFold
from tensorflow import expand_dims
from tensorflow.keras.losses import Huber
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from os.path import join
from tensorflow import constant
from time import sleep
from prophet import Prophet
#import FinanceDataReader as fdr

In [None]:
data=pd.read_csv('/content/drive/MyDrive/2023_1st_vac/KRX_DATA/train.csv')

In [None]:
def convert_date(date_str):
  return pd.to_datetime(date_str, format="%Y%m%d").strftime("%Y-%m-%d")
##make sliced=[sliced]
data["일자"] =data["일자"].apply(convert_date)

In [None]:
# prescaler
def getAOC(data):
  AOC = []
  close = data['종가']
  close_pre = close.shift(1)

  AOC.append(0)

  # aoc = (금일 종가 - 전일 종가) / 전일 종가
  for i in range(1, len(close)):
    aoc_value = ((close[i] - close_pre[i]) / close_pre[i]) * 100
    AOC.append(aoc_value)

  return AOC

#sort data by "종목명"
subject_Data=data.sort_values(by="종목코드",inplace=False)
subject_Data.head()
subject_Data.columns


pv_s_data=pd.pivot_table(subject_Data,values=['거래량', '시가', '고가', '저가', '종가'],index=['종목코드','일자'])
pv_s_data["AOC"] = getAOC(pv_s_data)

divided = pv_s_data.index.get_level_values('종목코드').unique()

sliced_dataframes = []

for i in divided:
    sliced_data = pv_s_data.loc[i]
    sliced_dataframe = sliced_data.reset_index()
    sliced_dataframes.append(sliced_dataframe)

for s in sliced_dataframes:
  s.set_index('일자',inplace=True)

In [None]:
sliced_dataframes[0]

In [None]:
## sorting labels
labels=data[['종목코드','일자']]
labels = labels.sort_values(by=['종목코드','일자'])
labels=labels.drop('일자',axis=1)
labels=labels.drop_duplicates('종목코드')
data.sort_values(by=['종목코드',"일자"],inplace=True)

In [None]:
label_lis=[]
for label in labels:
  label_lis.append(label)

## Model shells

In [None]:
pd.options.display.float_format = '{:.5f}'.format

class LSTM2():
  def __init__(self,data,idx):
    self.data=data
    self.idx=idx
    self.time_steps=60

  def create_sequences(self, data):
    num_samples, num_features = data.shape
    sequences = []
    for i in range(num_samples - self.time_steps +1):
      sequences.append(data[i:i + self.time_steps, :])
    return np.array(sequences)

  def inverse_sequences(self,data_sequences):
    num_samples, num_steps, num_features = data_sequences.shape
    data = np.zeros((num_samples + self.time_steps - 1, num_features))
    for i in range(num_samples):
      data[i:i + self.time_steps, :] += data_sequences[i, :, :]
    data /= self.time_steps
    return data[:num_samples]
    #frquency domain inverse laplace transform

  def slicing_data(self):
    self.x_scaler = MinMaxScaler()
    self.y_scaler = MinMaxScaler()
    # Scaling features
    x_data = self.data[self.data.columns[:]]
    x_data = self.x_scaler.fit_transform(x_data)
    # Scaling target variable
    y_data = self.data[self.data.columns[:]]
    y_data = self.y_scaler.fit_transform(y_data)
    # validation set
    x_train_p, x_test, y_train_p, y_test = train_test_split(x_data, y_data, test_size=0.8, shuffle=False)
    x_train, x_val, y_train, y_val = train_test_split(x_train_p, y_train_p, test_size=0.25, shuffle=False)
    __t,x_final,__t,__t=train_test_split(x_data, y_data, test_size=60/len(y_data), shuffle=False)

    self.num_features = x_data.shape[1]
    self.num_samples=x_data.shape[0]

    self.x_train = self.create_sequences(x_train)
    self.x_test = self.create_sequences(x_test)
    self.x_val = self.create_sequences(x_val)
    self.x_final = self.create_sequences(x_final)

    self.y_train=self.create_sequences(y_train)
    self.y_test=self.create_sequences(y_test)
    self.y_val=self.create_sequences(y_val)

  def model_struct(self):
    self.model = Sequential()
    # layers
    self.model.add(Bidirectional(LSTM(1024, return_sequences=True, input_shape=(self.time_steps, self.num_features))))
    self.model.add(Dense(256, activation='relu'))
    self.model.add(Dense(128, activation='relu'))
    self.model.add(Dense(64, activation='relu'))
    self.model.add(Dense(32, activation='relu'))
    self.model.add(Dense(16, activation='relu'))
    # output layer (many-to-many with TimeDistributed)
    self.model.add(TimeDistributed(Dense(len(self.data.columns), activation='relu')))


  def learning(self):
    model_save_path = '/content/drive/MyDrive/2023_1st_vac/KRX_modelings/best_model/LSTM2/'

    self.filename = join(model_save_path, 'ckeckpointer.ckpt''cke ckpointer_0726_{}.ckpt'.format(self.idx))
    checkpoint = ModelCheckpoint(self.filename, save_weights_only=True, save_best_only=True, monitor='val_loss', verbose=0)
    earlystopping = EarlyStopping(monitor='val_loss', patience=100)
    self.model.compile(loss='mean_squared_error', optimizer=Adam(learning_rate=0.001))
    self.history = self.model.fit(self.x_train, self.y_train, epochs=1, batch_size=128, validation_data=(self.x_val, self.y_val), shuffle=False, callbacks=[checkpoint, earlystopping], verbose=0)

  def get_gap_by_test(self):
    self.model.load_weights(self.filename)
    pred = self.model.predict(self.x_test)
    rescaled_pred = self.y_scaler.inverse_transform(self.inverse_sequences(pred))
    rescaled_real = self.y_scaler.inverse_transform(self.inverse_sequences(self.y_test))

    gaps = []

    for p, r in zip(rescaled_pred, rescaled_real):
        gap = np.abs(p - r)
        gaps.append(gap)

    avg_gap = np.mean(gaps)


    return avg_gap



  def return_val(self):
      self.slicing_data()
      self.model_struct()
      self.learning()
      self.model.load_weights(self.filename)

      next_input_data = np.copy(self.x_final)
      for day in range(1, 16):
          pred_day = self.model.predict(np.expand_dims(next_input_data[-1], axis=0))
          next_input_data = np.concatenate((next_input_data, pred_day), axis=0)

      next_15_days_data = next_input_data[-15:]
      next_15_days_data=self.y_scaler.inverse_transform(self.inverse_sequences(next_15_days_data))

      columns=self.data.columns.tolist()
      ret_df = pd.DataFrame(next_15_days_data, columns=columns)

      prices=[]
      for end_price in ret_df.iloc[:,-2]:
        prices.append(end_price)

      gap=self.get_gap_by_test()


      return prices,gap

## TEST **Shell**

In [None]:
l1=[]
l2=[]
#test shell
m=LSTM2(sliced_dataframes[0],0)
#m.slicing_data()
#m.model_struct()
#m.learning()
pred,gap1=m.return_val()
l1.append(pred)
l2.append(gap1)
m=LSTM2(sliced_dataframes[1],1)
pred1,gap=m.return_val()
l1.append(pred1)
l2.append(gap)

df=pd.DataFrame({
    '종가':l1,
    'gap':l2
})
df.to_csv('/content/drive/MyDrive/2023_1st_vac/KRX_modelings/predicitions/{}.csv'.format("230725_v0.1"),index=True)

## Predict **Shell**

In [None]:
idx=0

pred_list=[]
gap_acc=[]
out_labels=[]

In [None]:
for c in sliced_dataframes:
  print(labels.iloc[idx])
  print("{}번째 종목코드".format(idx))
  LSTM_model=LSTM2(c,idx=idx)
  pred,gap = LSTM_model.return_val()
  sleep(0.5)
  pred_list.append(pred)
  gap_acc.append(gap)
  idx+=1
  #to prevent model corrupting

In [None]:
pred_csv_df=pd.DataFrame({
    '종목코드':label_lis,
    '종가':pred_list,
    'gap':gap_acc
})

In [None]:
pred_csv_df.to_csv('/content/drive/MyDrive/2023_1st_vac/KRX_modelings/predicitions/{}.csv'.format("230725_v0.1"),index=True)