In [38]:
import pandas as pd
import numpy as np
import random
import os
import pickle
from tqdm import tqdm

import warnings
warnings.filterwarnings("ignore")

In [217]:
# Read in price data
train = pd.read_csv("./train.csv", parse_dates=True, index_col="일자")
train = train.reset_index()
train.columns = ['date', 'ticker', 'firm', 'volume', 'open', 'high', 'low', 'close']
df = train.sort_values(by=['ticker', 'date'], ascending=True)

df['adjustTrue'] = 0
df.loc[df['volume'] == 0, 'adjustTrue'] = 1
df = df.reset_index(drop=True)
df.tail()

Unnamed: 0,date,ticker,firm,volume,open,high,low,close,adjustTrue
987995,2023-05-23,A383800,LX홀딩스,150364,8390,8390,8310,8330,0
987996,2023-05-24,A383800,LX홀딩스,122457,8310,8340,8280,8300,0
987997,2023-05-25,A383800,LX홀딩스,84241,8300,8310,8270,8310,0
987998,2023-05-26,A383800,LX홀딩스,126681,8300,8310,8270,8280,0
987999,2023-05-30,A383800,LX홀딩스,70489,8300,8300,8270,8290,0


In [248]:
# Convert 'date' to datetime and sort the data by date
data = df

# Sort the data in descending order of date
data = data.sort_values(['ticker','date'], ascending=[True,False])
data = data.reset_index(drop=True)
data

Unnamed: 0,date,ticker,firm,volume,open,high,low,close,adjustTrue
0,2023-05-30,A000020,동화약품,201361,9960,10040,9640,9700,0
1,2023-05-26,A000020,동화약품,196257,10050,10150,9850,9850,0
2,2023-05-25,A000020,동화약품,398326,9660,10180,9660,10040,0
3,2023-05-24,A000020,동화약품,205243,9770,9820,9550,9740,0
4,2023-05-23,A000020,동화약품,641524,9160,9900,9160,9770,0
...,...,...,...,...,...,...,...,...,...
987995,2021-06-07,A383800,LX홀딩스,2714980,10550,11150,10500,10800,0
987996,2021-06-04,A383800,LX홀딩스,1737593,10450,10650,10350,10450,0
987997,2021-06-03,A383800,LX홀딩스,2709800,10650,10700,10300,10400,0
987998,2021-06-02,A383800,LX홀딩스,2426922,10700,10850,10600,10700,0


In [256]:
result = pd.DataFrame()
ticker_list = data['ticker'].unique()

for ticker in tqdm(ticker_list):
    temp = data[data['ticker'] == ticker]
    # Find the index where trading was suspended
    suspension_indices = temp[temp['adjustTrue'] == 1].index
    if len(suspension_indices) == 0:
        continue
    elif len(suspension_indices) == 1:
        temp.loc[suspension_indices, ['volume', 'open', 'high', 'low']] = temp.loc[suspension_indices+1, ['volume', 'open', 'high', 'low']]
        continue
    else:
        for index in tqdm(suspension_indices):
            # Get the split ratio from the close price at the suspension date and the open price at the date following the suspension
            close_price_at_suspension = temp.loc[index, 'close']
            open_price_after_suspension = temp.loc[index-1, 'open']  # because the data is in descending order
            split_ratio = close_price_at_suspension / open_price_after_suspension if open_price_after_suspension != 0 else 1

            # Adjust the volume, open, high, low, and close prices for all previous dates (because the data is in descending order)
            temp.loc[index+1:, ['open', 'high', 'low', 'close']] /= split_ratio
            temp.loc[index+1:, 'volume'] *= split_ratio

        # Sort the data in ascending order of date
        temp = temp.sort_values('date', ascending=True)

        # Interpolate zero values in the data using 'pad' method
        temp.replace(0, pd.NA, inplace=True)
        temp.interpolate(method='pad', limit_direction='forward', inplace=True)
    result = pd.concat([result, temp], axis=0)

  0%|          | 0/2000 [00:00<?, ?it/s]

100%|██████████| 5/5 [00:00<00:00, 397.76it/s]/s]
100%|██████████| 14/14 [00:00<00:00, 505.41it/s]]
100%|██████████| 17/17 [00:00<00:00, 474.09it/s]]
100%|██████████| 3/3 [00:00<00:00, 435.08it/s]/s]
100%|██████████| 5/5 [00:00<00:00, 468.20it/s]/s]
100%|██████████| 15/15 [00:00<00:00, 481.66it/s]s]
100%|██████████| 21/21 [00:00<00:00, 471.89it/s]s]
100%|██████████| 3/3 [00:00<00:00, 373.40it/s]t/s]
100%|██████████| 6/6 [00:00<00:00, 502.76it/s]
100%|██████████| 75/75 [00:00<00:00, 495.81it/s]s]
  0%|          | 0/36 [00:00<?, ?it/s], 21.48it/s]
  8%|▊         | 160/2000 [00:06<01:19, 23.20it/s]


KeyError: 79039

In [216]:
suspension_indices

Index([  1268,   1269,   1270,   2879,   2880,   2881,   2882,   2883,   2884,
         2885,
       ...
       960844, 960845, 960846, 960847, 960848, 976175, 976176, 976177, 976178,
       976179],
      dtype='int64', length=21220)

In [215]:
temp

Unnamed: 0,date,ticker,firm,volume,open,high,low,close,adjustTrue
0,2023-05-30,A383800,LX홀딩스,70489,8300,8300,8270,8290,0
1,2023-05-26,A383800,LX홀딩스,126681,8300,8310,8270,8280,0
2,2023-05-25,A383800,LX홀딩스,84241,8300,8310,8270,8310,0
3,2023-05-24,A383800,LX홀딩스,122457,8310,8340,8280,8300,0
4,2023-05-23,A383800,LX홀딩스,150364,8390,8390,8310,8330,0
...,...,...,...,...,...,...,...,...,...
489,2021-06-07,A383800,LX홀딩스,2714980,10550,11150,10500,10800,0
490,2021-06-04,A383800,LX홀딩스,1737593,10450,10650,10350,10450,0
491,2021-06-03,A383800,LX홀딩스,2709800,10650,10700,10300,10400,0
492,2021-06-02,A383800,LX홀딩스,2426922,10700,10850,10600,10700,0
