In [1]:
import pandas as pd
import numpy as np
import random
import os

from tqdm import tqdm
import warnings

import pandas_ta as ta
warnings.filterwarnings("ignore")

In [2]:
def calculate_technical_indicators(df: pd.DataFrame) -> pd.DataFrame:
    # Moving Averages
    df.ta.sma(close='Close', length=10, append=True)
    df.ta.sma(close='Close', length=20, append=True)
    df.ta.sma(close='Close', length=50, append=True)
    df.ta.sma(close='Close', length=100, append=True)
    df.ta.sma(close='Close', length=200, append=True)

    df.ta.ema(close='Close', length=10, append=True)
    df.ta.ema(close='Close', length=20, append=True)
    df.ta.ema(close='Close', length=50, append=True)
    df.ta.ema(close='Close', length=100, append=True)
    df.ta.ema(close='Close', length=200, append=True)

    # Momentum Indicators
    df.ta.rsi(close='Close', length=14, append=True)
    df.ta.macd(close='Close', fast=12, slow=26, signal=9, append=True)
    df.ta.stoch(close='Close', append=True)
    df.ta.roc(close='Close', append=True)
    # MACD는 단기 EMA (12일)가 장기 EMA (26일)에 비해 얼마나 빠르게 움직이는지를 보여줍니다.

    # Volume Indicators
    df.ta.vp(close='Close', volume='Volume', append=True)
    df.ta.obv(close='Close', volume='Volume', append=True)

    # Volatility Indicators
    df.ta.atr(close='Close', append=True)
    df.ta.bbands(close='Close', append=True)

    # Trend Strength Indicators
    df.ta.adx(close='Close', append=True)

    df.ta.efi(length=13, append=True) # Elder's Force Index (EFI): 알렉산더 엘더가 개발한 이 지표는 가격의 변동성과 거래량을 결합하여 주식의 '힘'을 측정합니다.
    df.ta.kama(length=10, append=True) # Kaufman's Adaptive Moving Average (KAMA): 이 지표는 변동성을 고려하여 보다 유연한 이동 평균을 제공합니다.
    df.ta.mfi(high='High', low='Low', close='Close', volume='Volume', length=14, append=True) # Money Flow Index (MFI): 이 지표는 가격과 거래량을 결합하여 주식이 과매수 또는 과매도 상태인지 판단합니다.
    df.ta.vortex(high='High', low='Low', close='Close', length=14, append=True) # Vortex Indicator (VI): 이 지표는 최근 가격의 상승과 하락을 추적하여 상승 추세와 하락 추세를 식별합니다.

    return df


In [3]:
train = pd.read_csv("./data/train_adj.csv")
train = train.drop('Unnamed: 0', axis=1)
train = train.round(2)
train = train.sort_values(['ticker', 'date'], ascending=True)
train.set_index('date', inplace=True)
train

Unnamed: 0_level_0,ticker,firm,volume,open,high,low,close,adjustTrue
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-06-01,A000020,동화약품,114966.0,14700.0,14700.0,14450.0,14600.0,1
2021-06-02,A000020,동화약품,109559.0,14700.0,14700.0,14450.0,14500.0,1
2021-06-03,A000020,동화약품,96158.0,14550.0,14650.0,14450.0,14600.0,1
2021-06-04,A000020,동화약품,133900.0,14600.0,14800.0,14550.0,14700.0,1
2021-06-07,A000020,동화약품,511140.0,14800.0,15550.0,14750.0,15150.0,1
...,...,...,...,...,...,...,...,...
2023-05-23,A383800,LX홀딩스,150364.0,8390.0,8390.0,8310.0,8330.0,1
2023-05-24,A383800,LX홀딩스,122457.0,8310.0,8340.0,8280.0,8300.0,1
2023-05-25,A383800,LX홀딩스,84241.0,8300.0,8310.0,8270.0,8310.0,1
2023-05-26,A383800,LX홀딩스,126681.0,8300.0,8310.0,8270.0,8280.0,1


In [4]:
ticker_list = train['ticker'].unique()
data_frames = []  # store DataFrames here

for ticker in tqdm(ticker_list):
    temp = calculate_technical_indicators(train[train['ticker'] == ticker])
    data_frames.append(temp)

# concat all at once
data = pd.concat(data_frames, axis=0)

100%|██████████| 2000/2000 [01:41<00:00, 19.79it/s]


In [7]:
data_ta = data.dropna(axis=1, how='all')
data_ta = data_ta.dropna(axis=0)
ticker_list = data_ta['ticker'].unique()
train_frames = []  # store DataFrames here
submission_frames = []

for ticker in tqdm(ticker_list):
    temp = data_ta[data_ta['ticker'] == ticker]
    temp['target'] = temp['close'].shift(-15)
    train_frames.append(temp.dropna(axis=0))
    submission_frames.append(temp[temp['target'].isna()])

# concat all at once
train = pd.concat(train_frames, axis=0)
submission = pd.concat(submission_frames, axis=0)

100%|██████████| 1985/1985 [00:31<00:00, 63.55it/s]


In [6]:
train.to_pickle("train_기술적_분석_14.pkl")
submission.to_pickle("prediction_기술적_분석_14.pkl")