In [1]:
import pandas as pd
import numpy as np

from tqdm import tqdm
import numba

In [2]:
# 데이터를 불러옵니다
raw_data = pd.read_csv("Data-QuantTrading.csv", index_col="KST")
raw_data = raw_data.drop("Unnamed: 0", axis=1)
raw_data.index = pd.to_datetime(raw_data.index)

# 추가적으로 하나하나 제거해주어야 하는 문제가 있는 데이터들을 찾습니다
# 문제 있는 데이터들의 경우 15:30 종가가 존재하지 않습니다
tmp_data = raw_data.resample('d').last()
for time in raw_data.index:
    if (time.hour == 15) and (time.minute//10 == 2):
        tmp_data = tmp_data.drop(str(time.year)+"-"+str(time.month)+"-"+str(time.day))


for time in tmp_data.index:
    day = time.weekday() 
    if day == 5: # 토요일 제거
        tmp_data = tmp_data.drop(str(time.year)+"-"+str(time.month)+"-"+str(time.day))
    elif day == 6: # 일요일 제거
        tmp_data = tmp_data.drop(str(time.year)+"-"+str(time.month)+"-"+str(time.day))

problematic_days = tmp_data.index

# 토요일과 일요일 및
# 위에서 찾은 문제가 있는 데이터들을 모두 drop 합니다.
# 단순 반복문 + 조건문을 사용하기 때문에 꽤 시간이 걸립니다
pp_data = raw_data.copy()

for idx in pp_data.index:
    day = idx.weekday()
    if day == 5:
        pp_data = pp_data.drop(idx, axis=0)
    elif day == 6:
        pp_data = pp_data.drop(idx, axis=0)

    for problem in problematic_days:
        if (idx.year) == problem.year and (idx.month == problem.month) and (idx.day == problem.day):
             pp_data = pp_data.drop(idx, axis=0)

In [3]:
def easy_pack(current_time, data, win_size):
    if current_time >= win_size:
        return {key:value[current_time-win_size:current_time] for key,value in zip(data.keys(),data.values())}

In [28]:
def put_data(
    input_data,
    base_index,
    base_columns
):
    if not np.array_equal(input_data.index, base_index):
        raise ValueError("Index of input data are wrong")
    if not np.array_equal(input_data.columns, base_columns):
        raise ValueError("Columns of input data are wrong")
    
    return input_data.values

In [None]:
def cal_pf_value(current_cash:float, position:list, bid_price:list, ask_price:list):
    long_position_value = np.sum(np.where(position>0, position*bid_price, 0))
    short_position_value = np.sum(np.where(position<0, position*ask_price, 0))
    return current_cash + long_position_value + short_position_value

In [183]:
#@numba.njit
def execution(bid_price, ask_price, bid_size, ask_size, cash, order, position):

    long_order_adj = np.where(order>0, order, 0)
    long_order_adj = np.where(ask_size-long_order_adj>=0, long_order_adj, ask_size)
    short_order_adj = np.where(order<0, order, 0)
    short_order_adj = np.where(bid_size+short_order_adj>=0, short_order_adj, -bid_size)

    long_order_cashflow = np.where(long_order_adj>0, long_order_adj*ask_price, 0)
    short_order_cashflow = np.where(short_order_adj<0, short_order_adj*bid_price, 0)
    
    res_cashflow = -(np.sum(long_order_cashflow) + np.sum(short_order_cashflow))
    res_cash = cash + np.sum(np.array([res_cashflow]))
    res_order = long_order_adj + short_order_adj
    res_postion = position + res_order
    
    return res_cash, order-res_order, res_order, res_postion, res_cashflow

In [238]:
"""
      | 종목1 | 종목2 | 종목3 | 
날짜1 |
날짜2 |
날짜3 |


Daily Engine Flow

1. Excution
2. pf_value Calculation
3. Signal Generation

"""
class Packtesting:

    def __init__(self, init_cash, bid_price, ask_price=None, bid_size=None, ask_size=None):
        self.__data = {} # dictionary
        self.__variable = {} # dictionary

        self.__base_index = bid_price.index # list
        self.__base_columns = bid_price.columns # list 
        self.__bid_price = bid_price.values # # array

        if type(ask_price) == type(None):
            self.__ask_price = bid_price.values # array
        else:
            self.__ask_price = put_data(ask_price, self.__base_index, self.__base_columns) # array
        if type(bid_size) == type(None):
            self.__bid_size = np.zeros_like(self.__bid_price) + np.inf # array
        else:
            self.__bid_size = put_data(bid_size, self.__base_index, self.__base_columns) # array
        if type(ask_size) == type(None):
            self.__ask_size = np.zeros_like(self.__bid_price) + np.inf # array
        else:
            self.__ask_size = put_data(ask_size, self.__base_index, self.__base_columns) # array

        self.__cash = np.zeros(len(self.__bid_price)) # list
        self.__cash[0] = init_cash
        self.__pf_value = np.zeros(len(self.__bid_price)) # list

        self.__order = np.zeros_like(self.__bid_price) # array
        self.__order_adjusted = np.zeros_like(self.__bid_price) # array
        self.__position = np.zeros_like(self.__bid_price) # array
        self.__cashflow = np.zeros_like(self.__bid_price) # array

    @property
    def _time_index(self):
        return self.__base_index
    
    @property
    def _security_columns(self):
        return self.__base_columns
    
    @property
    def _ask_price(self):
        return self.__ask_price

    @property
    def _bid_size(self):
        return self.__bid_size
    
    @property
    def _ask_size(self):
        return self.__ask_size
    
    @property
    def _cash(self):
        return self.__cash
    
    @property
    def _pf_value(self):
        return self.__pf_value
    
    @property
    def _order(self):
        return self.__order
    
    @property
    def _adjusted_order(self):
        return self.__order_adjusted

    @property
    def _position(self):
        return self.__position
    
    @property
    def _cashflow(self):
        return self.__cashflow 

    def post_data(self, key, value):
        self.__data[key] = put_data(value, self.__base_index, self.__base_columns)
    
    ### For Data Packet ###
    def pack_get_now(self, current_time):
        return self._time_index[current_time]

    def pack_get_data_rolling(self, current_time, name, win_size=0):
        return self.__data[name][current_time-win_size+1:current_time+1]

    def pack_get_data_expanding(self, current_time, name):
        return self.__data[name][:current_time+1]
    
    def pack_get_variable(self, name):
        return self.__variable[name]
    
    def pack_post_variable(self, name, value):
        self.__variable[name] = value

    def pack_get_account_rolling(self, current_time, name, win_size=0):
        return getattr(self, "_"+name)[current_time-win_size+1:current_time+1]
    
    def pack_get_account_expanding(self, current_time, name):
        return getattr(self, "_"+name)[:current_time+1]

    ### User - Defined Methods ###
    def create_packet(
        self,
        current_time
    ):
        return None

    # @staticmethod
    def create_continue(
        packet
    ):
        return False

    # @staticmethod
    def create_signal(
        packet
    ):
        return None

    @staticmethod
    def execution(*args):
        return execution(*args)

    ### Backtesting Engine ###
    def run(
        self,
    ):
        signal = np.zeros_like(self.__order[0])
        zero_order = np.zeros_like(self.__order[0])
        tmp_order_adjust = np.zeros_like(self.__order[0])
        for time in tqdm(range(len(self.__base_index))):
        # for time in range(1,200):    
            self.__order[time] = signal + tmp_order_adjust # 주문 = 직전 signal + 저번 execution에서 못하고 밀렸던 것들

            if np.array_equal(zero_order, self.__order[time]): # 주문이 없을 경우 Execution skip
                self.__cash[time] = self.__cash[time-1]
                self.__position[time] = self.__position[time-1]
            else:
                self.__cash[time], tmp_order_adjust, self.__order_adjusted[time], self.__position[time], self.__cashflow[time] = self.execution(
                                                                                                                                        self.__bid_price[time], 
                                                                                                                                        self.__ask_price[time],
                                                                                                                                        self.__bid_size[time], 
                                                                                                                                        self.__ask_size[time],
                                                                                                                                        self.__cash[time-1], 
                                                                                                                                        self.__order[time],
                                                                                                                                        self.__position[time-1]
                                                                                                                                        )  # Execution 먼저
                                                                                                                                    
            self.__pf_value[time] = cal_pf_value(self.__cash[time], self.__position[time], self.__bid_price[time], self.__ask_price[time]) # pf_value 계산

            packet = self.create_packet(time) # 뒤에 보내줄 데이터 패킷 생성

            if self.create_continue(packet): # Signal 연산 skip 조건
                signal = zero_order.copy()
            else:
                signal = self.create_signal(packet) # 다음 Signal 생성

        print("Done")

    @property
    def result(self):
        res = {}
        res["cash"] = pd.DataFrame(self.__cash, self.__base_index, ["cash"])
        res["cashflow"] = pd.DataFrame(self.__cashflow, self.__base_index, self.__base_columns)
        res["order"] = pd.DataFrame(self.__order, self.__base_index, self.__base_columns)
        res["order_adjusted"] = pd.DataFrame(self.__order_adjusted, self.__base_index, self.__base_columns)
        res["pf_value"] = pd.DataFrame(self.__pf_value, self.__base_index, ["pf_value"])
        res["position"] = pd.DataFrame(self.__position, self.__base_index, self.__base_columns)
        
        return res
        

    def performance(self):
        pass

In [3]:
vwap = pd.DataFrame(pp_data["vwap"])
open = pd.DataFrame(pp_data["open"])
high = pd.DataFrame(pp_data["high"])
low = pd.DataFrame(pp_data["low"])
close = pd.DataFrame(pp_data["close"])
volume = pd.DataFrame(pp_data["volume"])
ticks = pd.DataFrame(pp_data["ticks"])

bid = pd.DataFrame(pp_data["bid"])
bid_size = pd.DataFrame(pp_data["bid_size"])
ask = pd.DataFrame(pp_data["ask"])
ask_size = pd.DataFrame(pp_data["ask_size"])
bid_ask_premium = pd.DataFrame(pp_data["ask"] - pp_data['bid'])

daily_close = []
for time in pp_data.index:
    if (time.hour == 15) and (time.minute//10 == 2):
        daily_close.append(pp_data.loc[time,"close"])
daily_close = pd.DataFrame(daily_close, index=close.resample('d').last().dropna(axis=0).index, columns=["daily_close"])

daily_open = []
for time in pp_data.index:
    if (time.hour == 9) and (time.minute//10 == 0):
        daily_open.append(pp_data.loc[time,"open"])
daily_open = pd.DataFrame(daily_open, index=close.resample('d').last().dropna(axis=0).index, columns=["daily_open"])

daily_high = []
for time in daily_close.index:
    start = "{year}-{month}-{day} 09:00".format(year=time.year, month=time.month, day=time.day)
    end = "{year}-{month}-{day} 15:30".format(year=time.year, month=time.month, day=time.day)
    daily_high.append(pp_data.loc[start:end]["high"].max())
daily_high = pd.DataFrame(daily_high, index=daily_close.index, columns=["daily_high"])

daily_low = []
for time in daily_close.index:
    start = "{year}-{month}-{day} 09:00".format(year=time.year, month=time.month, day=time.day)
    end = "{year}-{month}-{day} 15:30".format(year=time.year, month=time.month, day=time.day)
    daily_low.append(pp_data.loc[start:end]["low"].min())
daily_low = pd.DataFrame(daily_low, index=daily_close.index, columns=["daily_low"])

In [21]:
class MyTest(Packtesting):
    
    def create_packet(self, current_time):
        packet = {}
        # if current_time >= 20:
        packet["close"] = self.pack_get_data_expanding(current_time, "close")
        packet["date"] = self.pack_get_now(current_time)
        return packet
    
    @staticmethod
    def create_signal(packet):
        ma20 = np.array([np.mean(packet["close"][:,stock]) for stock in range(packet["close"].shape[1])])

        signal = np.where(packet["close"][-1,:]>ma20, 100, -1)
        return signal

    @staticmethod
    def create_continue(packet):
        return False

In [22]:
bid.columns = ["close"]
ask.columns = ["close"]
bid_size.columns = ["close"]
ask_size.columns = ["close"]
packtest = MyTest(init_cash=100000000, bid_price=bid, ask_price=ask, bid_size=bid_size, ask_size=ask_size)
packtest.post_data("close", close)

In [23]:
packtest.run()

100%|██████████| 91989/91989 [00:08<00:00, 11300.08it/s]

Done





In [24]:
packtest.result["order"]

Unnamed: 0_level_0,close
KST,Unnamed: 1_level_1
2020-01-02 08:09:59.541837412+09:00,0.0
2020-01-02 08:19:58.181620251+09:00,-1.0
2020-01-02 08:29:58.750208471+09:00,100.0
2020-01-02 08:39:57.647607341+09:00,164.0
2020-01-02 08:49:59.071013731+09:00,119.0
...,...
2022-09-30 23:19:59.878906438+09:00,5329369.0
2022-09-30 23:29:59.998691451+09:00,5329365.0
2022-09-30 23:39:59.542683668+09:00,5329344.0
2022-09-30 23:49:59.826558214+09:00,5329302.0


In [263]:
packtest.result["order"]

Unnamed: 0_level_0,close
KST,Unnamed: 1_level_1
2020-01-02 08:09:59.541837412+09:00,0.0
2020-01-02 08:19:58.181620251+09:00,0.0
2020-01-02 08:29:58.750208471+09:00,-1.0
2020-01-02 08:39:57.647607341+09:00,100.0
2020-01-02 08:49:59.071013731+09:00,156.0
...,...
2022-09-30 23:19:59.878906438+09:00,5329391.0
2022-09-30 23:29:59.998691451+09:00,5329387.0
2022-09-30 23:39:59.542683668+09:00,5329366.0
2022-09-30 23:49:59.826558214+09:00,5329324.0


In [264]:
packtest.result["order_adjusted"]

Unnamed: 0_level_0,close
KST,Unnamed: 1_level_1
2020-01-02 08:09:59.541837412+09:00,0.0
2020-01-02 08:19:58.181620251+09:00,0.0
2020-01-02 08:29:58.750208471+09:00,-1.0
2020-01-02 08:39:57.647607341+09:00,44.0
2020-01-02 08:49:59.071013731+09:00,47.0
...,...
2022-09-30 23:19:59.878906438+09:00,3.0
2022-09-30 23:29:59.998691451+09:00,20.0
2022-09-30 23:39:59.542683668+09:00,41.0
2022-09-30 23:49:59.826558214+09:00,31.0


In [265]:
packtest.result["cash"]

Unnamed: 0_level_0,cash
KST,Unnamed: 1_level_1
2020-01-02 08:09:59.541837412+09:00,0.000000e+00
2020-01-02 08:19:58.181620251+09:00,0.000000e+00
2020-01-02 08:29:58.750208471+09:00,3.240250e+03
2020-01-02 08:39:57.647607341+09:00,-1.392758e+05
2020-01-02 08:49:59.071013731+09:00,-2.915088e+05
...,...
2022-09-30 23:19:59.878906438+09:00,-1.009585e+10
2022-09-30 23:29:59.998691451+09:00,-1.009592e+10
2022-09-30 23:39:59.542683668+09:00,-1.009608e+10
2022-09-30 23:49:59.826558214+09:00,-1.009619e+10


In [266]:
packtest.result["position"]

Unnamed: 0_level_0,close
KST,Unnamed: 1_level_1
2020-01-02 08:09:59.541837412+09:00,0.0
2020-01-02 08:19:58.181620251+09:00,0.0
2020-01-02 08:29:58.750208471+09:00,-1.0
2020-01-02 08:39:57.647607341+09:00,43.0
2020-01-02 08:49:59.071013731+09:00,90.0
...,...
2022-09-30 23:19:59.878906438+09:00,2574092.0
2022-09-30 23:29:59.998691451+09:00,2574112.0
2022-09-30 23:39:59.542683668+09:00,2574153.0
2022-09-30 23:49:59.826558214+09:00,2574184.0


In [267]:
packtest.result["pf_value"]

Unnamed: 0_level_0,pf_value
KST,Unnamed: 1_level_1
2020-01-02 08:09:59.541837412+09:00,0.000000e+00
2020-01-02 08:19:58.181620251+09:00,0.000000e+00
2020-01-02 08:29:58.750208471+09:00,-2.500000e-01
2020-01-02 08:39:57.647607341+09:00,-9.500000e+00
2020-01-02 08:49:59.071013731+09:00,-2.125000e+01
...,...
2022-09-30 23:19:59.878906438+09:00,-6.984851e+08
2022-09-30 23:29:59.998691451+09:00,-6.869017e+08
2022-09-30 23:39:59.542683668+09:00,-6.547253e+08
2022-09-30 23:49:59.826558214+09:00,-6.643784e+08
