In [1]:
import pandas as pd
import numpy as np
import random
import os
import pickle
from tqdm import tqdm

import warnings
warnings.filterwarnings("ignore")

In [47]:
# Read in price data
train = pd.read_csv("./train.csv", parse_dates=True, index_col="일자")
train = train.reset_index()
train.columns = ['date', 'ticker', 'firm', 'volume', 'open', 'high', 'low', 'close']
df = train.sort_values(by=['ticker', 'date'], ascending=True)

df['adjustTrue'] = 1
df.loc[df['volume'] == 0, 'adjustTrue'] = -1
df = df.sort_values(['ticker','date'], ascending=[True,False])
df = df.reset_index(drop=True)
df.tail()

Unnamed: 0,date,ticker,firm,volume,open,high,low,close,adjustTrue
987995,2021-06-07,A383800,LX홀딩스,2714980,10550,11150,10500,10800,1
987996,2021-06-04,A383800,LX홀딩스,1737593,10450,10650,10350,10450,1
987997,2021-06-03,A383800,LX홀딩스,2709800,10650,10700,10300,10400,1
987998,2021-06-02,A383800,LX홀딩스,2426922,10700,10850,10600,10700,1
987999,2021-06-01,A383800,LX홀딩스,1879288,11000,11300,10900,11000,1


In [48]:
# Convert 'date' to datetime and sort the data by date
data = df
data

Unnamed: 0,date,ticker,firm,volume,open,high,low,close,adjustTrue
0,2023-05-30,A000020,동화약품,201361,9960,10040,9640,9700,1
1,2023-05-26,A000020,동화약품,196257,10050,10150,9850,9850,1
2,2023-05-25,A000020,동화약품,398326,9660,10180,9660,10040,1
3,2023-05-24,A000020,동화약품,205243,9770,9820,9550,9740,1
4,2023-05-23,A000020,동화약품,641524,9160,9900,9160,9770,1
...,...,...,...,...,...,...,...,...,...
987995,2021-06-07,A383800,LX홀딩스,2714980,10550,11150,10500,10800,1
987996,2021-06-04,A383800,LX홀딩스,1737593,10450,10650,10350,10450,1
987997,2021-06-03,A383800,LX홀딩스,2709800,10650,10700,10300,10400,1
987998,2021-06-02,A383800,LX홀딩스,2426922,10700,10850,10600,10700,1


In [117]:
result = pd.DataFrame()
ticker_list = data['ticker'].unique()

for ticker in tqdm(ticker_list, leave=True):
    temp = data[data['ticker'] == ticker]
    temp = temp.reset_index(drop=True)
    # Find the index where trading was suspended
    suspension_indices = temp[temp['adjustTrue'] == -1].index

    if len(suspension_indices) == 0: # 거래정지가 없는 경우 패스
        result = pd.concat([result, temp], axis=0)
        continue
    else:
        for index in tqdm(suspension_indices, leave=True):
            # Get the split ratio from the close price at the suspension date and the open price at the date following the suspension
            close_price_at_suspension = temp.loc[index, 'close']
            try: # 23-05-30에 거래정지인 경우 + 다른 거래정지도 고려
                open_price_after_suspension = temp.loc[index-1, 'open']  # 미래 데이터
            except: # 23-05-30 하루만 거래정지면 for문 탈출
                continue
            split_ratio = close_price_at_suspension / open_price_after_suspension if open_price_after_suspension != 0 else 1
            # Adjust the volume, open, high, low, and close prices for all previous dates (because the data is in descending order)
            # 거래 정지 이후 값은 액면분할을 반영하여 덮어쓰기
            temp.loc[index+1:, ['open', 'high', 'low', 'close']] /= split_ratio
            temp.loc[index+1:, 'volume'] *= split_ratio
        
        # Sort the data in ascending order of date
        # 다시 과거-현재 순으로 재정렬
        temp = temp.sort_values('date', ascending=True)

        # Interpolate zero values in the data using 'pad' method
        # 아직도 0이 남았다 == 거래 정지일이 끝 날짜에 하루밖에 없었다
        # 과거, 미래 값으로 채우기
        temp.replace(0, pd.NA, inplace=True)
        temp.interpolate(method='ffill', inplace=True)
        temp.interpolate(method='bfill', inplace=True)

        # 액면분할 시 최초 거래정지일 기준 변경되지 않은 기준 close값 변경
        try:
            temp.loc[suspension_indices[0], 'close'] = temp.loc[suspension_indices[0] - 1, 'close']
        except:
            pass

        result = pd.concat([result, temp], axis=0)

100%|██████████| 5/5 [00:00<00:00, 398.81it/s]/s]
100%|██████████| 14/14 [00:00<00:00, 429.46it/s]]
100%|██████████| 1/1 [00:00<00:00, 1000.55it/s]s]
100%|██████████| 17/17 [00:00<00:00, 446.76it/s]]
100%|██████████| 3/3 [00:00<00:00, 374.22it/s]/s]
100%|██████████| 5/5 [00:00<00:00, 399.52it/s]/s]
100%|██████████| 1/1 [00:00<00:00, 285.35it/s]t/s]
100%|██████████| 1/1 [00:00<00:00, 333.70it/s]t/s]
100%|██████████| 15/15 [00:00<00:00, 450.95it/s]s]
100%|██████████| 21/21 [00:00<00:00, 450.34it/s]s]
100%|██████████| 3/3 [00:00<00:00, 399.77it/s]t/s]
100%|██████████| 6/6 [00:00<00:00, 428.30it/s]
100%|██████████| 75/75 [00:00<00:00, 450.35it/s]s]
100%|██████████| 36/36 [00:00<00:00, 463.94it/s]s]
100%|██████████| 474/474 [00:01<00:00, 451.74it/s]
100%|██████████| 1/1 [00:00<00:00, 334.07it/s]t/s]
100%|██████████| 8/8 [00:00<00:00, 499.54it/s]t/s]
100%|██████████| 13/13 [00:00<00:00, 405.50it/s]s]
100%|██████████| 29/29 [00:00<00:00, 441.23it/s]s]
100%|██████████| 5/5 [00:00<00:00, 381.04

In [120]:
result.to_csv("train_adj.csv")