In [1]:
import pandas as pd
import numpy as np
import random
import os
import pickle
from tqdm import tqdm

import warnings
warnings.filterwarnings("ignore")

In [47]:
# Read in price data
train = pd.read_csv("./train.csv", parse_dates=True, index_col="일자")
train = train.reset_index()
train.columns = ['date', 'ticker', 'firm', 'volume', 'open', 'high', 'low', 'close']
df = train.sort_values(by=['ticker', 'date'], ascending=True)

df['adjustTrue'] = 1
df.loc[df['volume'] == 0, 'adjustTrue'] = -1
df = df.sort_values(['ticker','date'], ascending=[True,False])
df = df.reset_index(drop=True)
df.tail()

Unnamed: 0,date,ticker,firm,volume,open,high,low,close,adjustTrue
987995,2021-06-07,A383800,LX홀딩스,2714980,10550,11150,10500,10800,1
987996,2021-06-04,A383800,LX홀딩스,1737593,10450,10650,10350,10450,1
987997,2021-06-03,A383800,LX홀딩스,2709800,10650,10700,10300,10400,1
987998,2021-06-02,A383800,LX홀딩스,2426922,10700,10850,10600,10700,1
987999,2021-06-01,A383800,LX홀딩스,1879288,11000,11300,10900,11000,1


In [80]:
df[df['adjustTrue'] == -1].head(500)

Unnamed: 0,date,ticker,firm,volume,open,high,low,close,adjustTrue
11393,2023-04-12,A000480,조선내화,0,0,0,0,100400,-1
11394,2023-04-11,A000480,조선내화,0,0,0,0,100400,-1
11395,2023-04-10,A000480,조선내화,0,0,0,0,100400,-1
11396,2023-04-07,A000480,조선내화,0,0,0,0,100400,-1
11397,2023-04-06,A000480,조선내화,0,0,0,0,100400,-1
...,...,...,...,...,...,...,...,...,...
81328,2022-02-23,A003620,KG모빌리티,0,0,0,0,2770,-1
81329,2022-02-22,A003620,KG모빌리티,0,0,0,0,2770,-1
81330,2022-02-21,A003620,KG모빌리티,0,0,0,0,2770,-1
81331,2022-02-18,A003620,KG모빌리티,0,0,0,0,2770,-1


In [48]:
# Convert 'date' to datetime and sort the data by date
data = df
data

Unnamed: 0,date,ticker,firm,volume,open,high,low,close,adjustTrue
0,2023-05-30,A000020,동화약품,201361,9960,10040,9640,9700,1
1,2023-05-26,A000020,동화약품,196257,10050,10150,9850,9850,1
2,2023-05-25,A000020,동화약품,398326,9660,10180,9660,10040,1
3,2023-05-24,A000020,동화약품,205243,9770,9820,9550,9740,1
4,2023-05-23,A000020,동화약품,641524,9160,9900,9160,9770,1
...,...,...,...,...,...,...,...,...,...
987995,2021-06-07,A383800,LX홀딩스,2714980,10550,11150,10500,10800,1
987996,2021-06-04,A383800,LX홀딩스,1737593,10450,10650,10350,10450,1
987997,2021-06-03,A383800,LX홀딩스,2709800,10650,10700,10300,10400,1
987998,2021-06-02,A383800,LX홀딩스,2426922,10700,10850,10600,10700,1


In [49]:
result = pd.DataFrame()
ticker_list = data['ticker'].unique()

for ticker in tqdm(ticker_list, leave=True):
    temp = data[data['ticker'] == ticker]
    temp = temp.reset_index(drop=True)
    # Find the index where trading was suspended
    suspension_indices = temp[temp['adjustTrue'] == -1].index

    if len(suspension_indices) == 0: # 거래정지가 없는 경우 패스
        result = pd.concat([result, temp], axis=0)
        continue
    else:
        for index in tqdm(suspension_indices, leave=True):
            # Get the split ratio from the close price at the suspension date and the open price at the date following the suspension
            close_price_at_suspension = temp.loc[index, 'close']
            try: # 23-05-30에 거래정지인 경우 + 다른 거래정지도 고려
                open_price_after_suspension = temp.loc[index-1, 'open']  # 미래 데이터
            except: # 하루만 거래정지면 for문 탈출
                continue
            split_ratio = close_price_at_suspension / open_price_after_suspension if open_price_after_suspension != 0 else 1

            # Adjust the volume, open, high, low, and close prices for all previous dates (because the data is in descending order)
            # 거래 정지 이후 값은 액면분할을 반영하여 덮어쓰기
            temp.loc[index+1:, ['open', 'high', 'low', 'close']] /= split_ratio
            temp.loc[index+1:, 'volume'] *= split_ratio

        # Sort the data in ascending order of date
        # 다시 과거-현재 순으로 재정렬
        temp = temp.sort_values('date', ascending=True)

        # Interpolate zero values in the data using 'pad' method
        # 아직도 0이 남았다 == 거래 정지일이 끝 날짜에 하루밖에 없었다
        # 과거, 미래 값으로 채우기
        temp.replace(0, pd.NA, inplace=True)
        temp.interpolate(method='ffill', inplace=True)
        temp.interpolate(method='bfill', inplace=True)

        result = pd.concat([result, temp], axis=0)

100%|██████████| 5/5 [00:00<00:00, 768.61it/s]/s]
100%|██████████| 14/14 [00:00<00:00, 998.92it/s]]
100%|██████████| 1/1 [00:00<?, ?it/s], 33.90it/s]
100%|██████████| 17/17 [00:00<00:00, 1096.70it/s]
100%|██████████| 3/3 [00:00<00:00, 854.24it/s]/s]
100%|██████████| 5/5 [00:00<00:00, 908.72it/s]/s]
100%|██████████| 1/1 [00:00<00:00, 1000.79it/s]/s]
100%|██████████| 1/1 [00:00<00:00, 665.66it/s]t/s]
100%|██████████| 15/15 [00:00<00:00, 936.76it/s]s]
100%|██████████| 21/21 [00:00<00:00, 1052.67it/s]]
100%|██████████| 3/3 [00:00<00:00, 665.76it/s]t/s]
100%|██████████| 6/6 [00:00<00:00, 922.26it/s]
100%|██████████| 75/75 [00:00<00:00, 1060.47it/s]]
100%|██████████| 36/36 [00:00<00:00, 1074.01it/s]]
100%|██████████| 474/474 [00:00<00:00, 1060.73it/s]
100%|██████████| 1/1 [00:00<00:00, 498.08it/s]t/s]
100%|██████████| 8/8 [00:00<00:00, 1065.96it/s]/s]
100%|██████████| 13/13 [00:00<00:00, 999.01it/s]s]
100%|██████████| 29/29 [00:00<00:00, 1073.49it/s]]
100%|██████████| 5/5 [00:00<00:00, 833.3

In [87]:
data

Unnamed: 0,date,ticker,firm,volume,open,high,low,close,adjustTrue
0,2023-05-30,A000020,동화약품,201361,9960,10040,9640,9700,1
1,2023-05-26,A000020,동화약품,196257,10050,10150,9850,9850,1
2,2023-05-25,A000020,동화약품,398326,9660,10180,9660,10040,1
3,2023-05-24,A000020,동화약품,205243,9770,9820,9550,9740,1
4,2023-05-23,A000020,동화약품,641524,9160,9900,9160,9770,1
...,...,...,...,...,...,...,...,...,...
987995,2021-06-07,A383800,LX홀딩스,2714980,10550,11150,10500,10800,1
987996,2021-06-04,A383800,LX홀딩스,1737593,10450,10650,10350,10450,1
987997,2021-06-03,A383800,LX홀딩스,2709800,10650,10700,10300,10400,1
987998,2021-06-02,A383800,LX홀딩스,2426922,10700,10850,10600,10700,1


In [98]:
result = pd.DataFrame()
ticker_list = ['A001530']

for ticker in tqdm(ticker_list, leave=True):
    temp = data[data['ticker'] == ticker]
    temp = temp.reset_index(drop=True)
    # Find the index where trading was suspended
    suspension_indices = temp[temp['adjustTrue'] == -1].index

    if len(suspension_indices) == 0: # 거래정지가 없는 경우 패스
        result = pd.concat([result, temp], axis=0)
        continue
    else:
        for index in tqdm(suspension_indices, leave=True):
            # Get the split ratio from the close price at the suspension date and the open price at the date following the suspension
            close_price_at_suspension = temp.loc[index, 'close']
            try: # 23-05-30에 거래정지인 경우 + 다른 거래정지도 고려
                open_price_after_suspension = temp.loc[index-1, 'open']  # 미래 데이터
            except: # 하루만 거래정지면 for문 탈출
                continue
            split_ratio = close_price_at_suspension / open_price_after_suspension if open_price_after_suspension != 0 else 1
            # Adjust the volume, open, high, low, and close prices for all previous dates (because the data is in descending order)
            # 거래 정지 이후 값은 액면분할을 반영하여 덮어쓰기
            temp.loc[index+1:, ['open', 'high', 'low', 'close']] /= split_ratio
            temp.loc[index+1:, 'volume'] *= split_ratio

        # Sort the data in ascending order of date
        # 다시 과거-현재 순으로 재정렬
        temp = temp.sort_values('date', ascending=True)

        # Interpolate zero values in the data using 'pad' method
        # 아직도 0이 남았다 == 거래 정지일이 끝 날짜에 하루밖에 없었다
        # 과거, 미래 값으로 채우기
        temp.replace(0, pd.NA, inplace=True)
        temp.interpolate(method='ffill', inplace=True)
        temp.interpolate(method='bfill', inplace=True)

        result = pd.concat([result, temp], axis=0)

100%|██████████| 3/3 [00:00<00:00, 271.58it/s]
100%|██████████| 1/1 [00:00<00:00, 21.87it/s]


In [99]:
result

Unnamed: 0,date,ticker,firm,volume,open,high,low,close,adjustTrue
493,2021-06-01,A001530,DI동일,120150.907441,17430.579965,17914.762742,17285.325132,17624.253076,1
492,2021-06-02,A001530,DI동일,288093.684211,17672.671353,18641.036907,17672.671353,18253.690685,1
491,2021-06-03,A001530,DI동일,183587.695100,18253.690685,18495.782074,17963.181019,18398.945518,1
490,2021-06-04,A001530,DI동일,64407.495463,18398.945518,18398.945518,18108.435852,18302.108963,1
489,2021-06-07,A001530,DI동일,80114.373866,18302.108963,18302.108963,17914.762742,18205.272408,1
...,...,...,...,...,...,...,...,...,...
4,2023-05-23,A001530,DI동일,147745.000000,20450.000000,21100.000000,20200.000000,20600.000000,1
3,2023-05-24,A001530,DI동일,71790.000000,20650.000000,21000.000000,20500.000000,20800.000000,1
2,2023-05-25,A001530,DI동일,77340.000000,20800.000000,20800.000000,20050.000000,20250.000000,1
1,2023-05-26,A001530,DI동일,71356.000000,20250.000000,20300.000000,19800.000000,20000.000000,1


In [71]:
test3 = result[result.isna( ).any(axis=1)]

In [81]:
result2 = result[:82000]