In [1]:
import pandas as pd
import numpy as np
import random
import os

from tqdm import tqdm
import warnings

import pandas_ta as ta
warnings.filterwarnings("ignore")

In [31]:
def calculate_technical_indicators(df: pd.DataFrame) -> pd.DataFrame:
    # Moving Averages
    df.ta.sma(close='Close', length=10, append=True)
    df.ta.sma(close='Close', length=20, append=True)
    df.ta.sma(close='Close', length=50, append=True)
    df.ta.sma(close='Close', length=100, append=True)
    df.ta.sma(close='Close', length=200, append=True)

    df.ta.ema(close='Close', length=10, append=True)
    df.ta.ema(close='Close', length=20, append=True)
    df.ta.ema(close='Close', length=50, append=True)
    df.ta.ema(close='Close', length=100, append=True)
    df.ta.ema(close='Close', length=200, append=True)

    # Momentum Indicators
    df.ta.rsi(close='Close', length=14, append=True)

    # MACD는 단기 EMA (12일)가 장기 EMA (26일)에 비해 얼마나 빠르게 움직이는지를 보여줍니다.
    df.ta.roc(close='Close', append=True)

    # Volume Indicators
    #df.ta.vp(close='Close', volume='Volume', append=True)
    df.ta.obv(close='Close', volume='Volume', append=True)

    # Volatility Indicators
    df.ta.atr(close='Close', append=True)
    df.ta.bbands(close='Close', append=True)

    # Trend Strength Indicators
    df.ta.adx(close='Close', append=True)

    # Cycle Indicators
    #df.ta.ht_trendline(close='Close', append=True)


    df.ta.efi(length=13, append=True) # Elder's Force Index (EFI): 알렉산더 엘더가 개발한 이 지표는 가격의 변동성과 거래량을 결합하여 주식의 '힘'을 측정합니다.
    df.ta.kama(length=10, append=True) # Kaufman's Adaptive Moving Average (KAMA): 이 지표는 변동성을 고려하여 보다 유연한 이동 평균을 제공합니다.
    #df.ta.mfi(high='High', low='Low', close='Close', volume='Volume', length=14, append=True) # Money Flow Index (MFI): 이 지표는 가격과 거래량을 결합하여 주식이 과매수 또는 과매도 상태인지 판단합니다.
    #df.ta.tii(length=14, append=True) # Trend Intensity Index (TII): 이 지표는 주식의 추세 강도를 측정하며, 방향과 강도를 동시에 판단할 수 있습니다.
    df.ta.vortex(high='High', low='Low', close='Close', length=14, append=True) # Vortex Indicator (VI): 이 지표는 최근 가격의 상승과 하락을 추적하여 상승 추세와 하락 추세를 식별합니다.

    return df


In [32]:
train = pd.read_csv("./train_adj.csv")
train = train.drop('Unnamed: 0', axis=1)
train = train.round(2)
train = train.sort_values(['ticker', 'date'], ascending=True)
train.set_index('date', inplace=True)
train

Unnamed: 0_level_0,ticker,firm,volume,open,high,low,close,adjustTrue
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-06-01,A000020,동화약품,114966.0,14700.0,14700.0,14450.0,14600.0,1
2021-06-02,A000020,동화약품,109559.0,14700.0,14700.0,14450.0,14500.0,1
2021-06-03,A000020,동화약품,96158.0,14550.0,14650.0,14450.0,14600.0,1
2021-06-04,A000020,동화약품,133900.0,14600.0,14800.0,14550.0,14700.0,1
2021-06-07,A000020,동화약품,511140.0,14800.0,15550.0,14750.0,15150.0,1
...,...,...,...,...,...,...,...,...
2023-05-23,A383800,LX홀딩스,150364.0,8390.0,8390.0,8310.0,8330.0,1
2023-05-24,A383800,LX홀딩스,122457.0,8310.0,8340.0,8280.0,8300.0,1
2023-05-25,A383800,LX홀딩스,84241.0,8300.0,8310.0,8270.0,8310.0,1
2023-05-26,A383800,LX홀딩스,126681.0,8300.0,8310.0,8270.0,8280.0,1


In [33]:
calculate_technical_indicators(train)

Unnamed: 0_level_0,ticker,firm,volume,open,high,low,close,adjustTrue,SMA_10,SMA_20,...,BBU_5_2.0,BBB_5_2.0,BBP_5_2.0,ADX_14,DMP_14,DMN_14,EFI_13,KAMA_10_2_30,VTXP_14,VTXM_14
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-06-01,A000020,동화약품,114966.0,14700.0,14700.0,14450.0,14600.0,1,,,...,,,,,,,,,,
2021-06-02,A000020,동화약품,109559.0,14700.0,14700.0,14450.0,14500.0,1,,,...,,,,,,,,,,
2021-06-03,A000020,동화약품,96158.0,14550.0,14650.0,14450.0,14600.0,1,,,...,,,,,,,,,,
2021-06-04,A000020,동화약품,133900.0,14600.0,14800.0,14550.0,14700.0,1,,,...,,,,,,,,,,
2021-06-07,A000020,동화약품,511140.0,14800.0,15550.0,14750.0,15150.0,1,,,...,15167.820926,6.224622,0.980537,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-05-23,A383800,LX홀딩스,150364.0,8390.0,8390.0,8310.0,8330.0,1,8394.0,8476.0,...,8386.496154,0.777978,0.130726,24.830786,12.066227,21.651061,-2.119785e+06,8402.067450,0.830769,1.130769
2023-05-24,A383800,LX홀딩스,122457.0,8310.0,8340.0,8280.0,8300.0,1,8370.0,8465.5,...,8380.708314,1.024426,0.055122,25.443523,11.486704,23.012618,-2.341775e+06,8374.877786,0.801587,1.214286
2023-05-25,A383800,LX홀딩스,84241.0,8300.0,8310.0,8270.0,8310.0,1,8351.0,8455.0,...,8375.607017,1.095006,0.280735,26.128448,11.103822,23.078862,-1.886891e+06,8361.207569,0.758065,1.241935
2023-05-26,A383800,LX홀딩스,126681.0,8300.0,8310.0,8270.0,8280.0,1,8337.0,8441.5,...,8370.552727,1.311994,0.170044,26.764450,10.719044,22.279117,-2.160254e+06,8347.169606,0.717949,1.333333


In [34]:
test = train.dropna(axis=0)
test

Unnamed: 0_level_0,ticker,firm,volume,open,high,low,close,adjustTrue,SMA_10,SMA_20,...,BBU_5_2.0,BBB_5_2.0,BBP_5_2.0,ADX_14,DMP_14,DMN_14,EFI_13,KAMA_10_2_30,VTXP_14,VTXM_14
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-03-23,A000020,동화약품,396150.0,13800.0,14100.0,13600.0,13650.0,1,13335.0,13020.0,...,14384.108004,8.314381,0.360653,20.440277,30.358104,13.516124,2.075979e+08,13031.963084,1.109091,0.842424
2022-03-24,A000020,동화약품,164839.0,13600.0,13700.0,13500.0,13600.0,1,13405.0,13062.5,...,14377.135531,8.062743,0.302562,21.452135,29.627900,14.393671,1.767636e+08,13056.980792,1.086957,0.913043
2022-03-25,A000020,동화약품,248995.0,13700.0,13950.0,13500.0,13900.0,1,13510.0,13125.0,...,13971.660919,3.520188,0.851733,22.711743,31.055802,13.600975,1.621829e+08,13116.285955,1.078788,0.872727
2022-03-28,A000020,동화약품,160036.0,13900.0,13900.0,13600.0,13750.0,1,13630.0,13205.0,...,13923.541565,3.115121,0.593659,23.881378,29.874583,13.083657,1.355845e+08,13175.382776,1.092593,0.901235
2022-03-29,A000020,동화약품,160334.0,13850.0,14000.0,13650.0,13750.0,1,13720.0,13270.0,...,13935.912603,2.999455,0.548564,25.101498,29.815148,12.486931,1.162153e+08,13215.806253,1.118750,0.868750
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-05-23,A383800,LX홀딩스,150364.0,8390.0,8390.0,8310.0,8330.0,1,8394.0,8476.0,...,8386.496154,0.777978,0.130726,24.830786,12.066227,21.651061,-2.119785e+06,8402.067450,0.830769,1.130769
2023-05-24,A383800,LX홀딩스,122457.0,8310.0,8340.0,8280.0,8300.0,1,8370.0,8465.5,...,8380.708314,1.024426,0.055122,25.443523,11.486704,23.012618,-2.341775e+06,8374.877786,0.801587,1.214286
2023-05-25,A383800,LX홀딩스,84241.0,8300.0,8310.0,8270.0,8310.0,1,8351.0,8455.0,...,8375.607017,1.095006,0.280735,26.128448,11.103822,23.078862,-1.886891e+06,8361.207569,0.758065,1.241935
2023-05-26,A383800,LX홀딩스,126681.0,8300.0,8310.0,8270.0,8280.0,1,8337.0,8441.5,...,8370.552727,1.311994,0.170044,26.764450,10.719044,22.279117,-2.160254e+06,8347.169606,0.717949,1.333333


In [None]:
# 추론 결과를 저장하기 위한 dataframe 생성
results_df = pd.DataFrame(columns=['종목코드', 'final_return'])

# train 데이터에 존재하는 독립적인 종목코드 추출
unique_codes = train['종목코드'].unique()

# 각 종목코드에 대해서 모델 학습 및 추론 반복
for code in tqdm(unique_codes):
    
    # 학습 데이터 생성
    train_close = train[train['종목코드'] == code][['일자', '종가']]
    train_close['일자'] = pd.to_datetime(train_close['일자'], format='%Y%m%d')
    train_close.set_index('일자', inplace=True)
    tc = train_close['종가']
    
    # 모델 선언, 학습 및 추론
    model = ARIMA(tc, order=(2, 1, 2))
    model_fit = model.fit()
    predictions = model_fit.forecast(steps=15) # 향후 15개의 거래일에 대해서 예측
    
    # 최종 수익률 계산
    final_return = (predictions.iloc[-1] - predictions.iloc[0]) / predictions.iloc[0]
    
    # 결과 저장
    results_df = results_df.append({'종목코드': code, 'final_return': final_return}, ignore_index=True)