# Tick 转 Bar （完整版）

## 目前进度

综合之前的研究，我们有了：

In [1]:
from typing import List, Dict
from pathlib import Path
import csv
import datetime as dt

import pandas as pd

from src.utility import DATA_PATH


# 定义 Timezone.
tz_beijing: dt.timezone = dt.timezone(dt.timedelta(hours=8))
tz_trading: dt.timezone = dt.timezone(dt.timedelta(hours=12))
tz_delta: dt.timedelta = dt.timedelta(hours=12)


# 各品种交易时间
class ProductTradingTime:
    """
    品种交易时间。
    """
    _exchange: str  # 交易所
    _product: str   # 品种
    _count: int     # 交易节的数量
    _optional: int  # 可选的交易节序号（有夜盘为1，无夜盘为0——不可选）。
    _sections: List[dt.time]    # 交易节时间

    def __init__(self,
                 exchange: str,
                 product: str,
                 count: int,
                 optional: int,
                 sections: List[dt.time]
                 ) -> None:

        self._exchange = exchange.upper()

        self._product = product

        if count <= 0:
            raise ValueError(f'Parameter <count> should be positive integer. Got {count} instead.')
        else:
            self._count = count

        if optional < 0 or optional > 1:
            raise ValueError(f'Parameter <optional> should be 0 or 1. Got {optional} instead.')
        else:
            self._optional = optional

        if len(sections) / count != 2:
            raise ValueError(f'Parameter <trading_section> should has twice number items of <count>. {product}')
        else:
            self._sections = sections
    
    @property
    def exchange(self) -> str:
        return self._exchange
    
    @property
    def product(self) -> str:
        return self._product
    
    @property
    def symbol(self) -> str:
        return f'{self._exchange}.{self._product}'
    
    @property
    def count(self) -> int:
        return self._count
    
    @property
    def optional(self) -> int:
        return self._optional
    
    @property
    def sections(self) -> int:
        return self._sections
    
    def section_at(self, n: int) -> tuple:
        if n < 1 or n > self._count:
            raise ValueError('<n> should in range [1, count], got {n} instead.')
        return self._sections[(n-1)*2], self._sections[(n-1)*2+1]
    
    def __str__(self) -> str:
        return f'<ProductTradingTime(' \
               f'Symbol={self.symbol}, ' \
               f'Count={self.count}, ' \
               f'Optional={self.optional}, ' \
               f'Sections={self.sections}, ' \
               f')>'


# 加载各品种交易时间
def read_product_trading_time(csv_path: Path) -> Dict[str, ProductTradingTime]:
    if not csv_path.exists():
        raise
    result: TradingTime = {}
    with open(csv_path, mode='r', newline='', encoding='utf-8') as csv_file:
        reader = csv.DictReader(csv_file)
        for row in reader:
            exchange = row['exchange'].upper()
            product = row['product']
            symbol = f'{exchange}.{product}'
            result[symbol] = ProductTradingTime(
                exchange=exchange,
                product=product,
                count=int(row['count']),
                optional=int(row['optional']),
                sections=[dt.time.fromisoformat(item) for item in row['section'].split(';')]
            )
    return result


# 转换为北京时区
def to_tz_beijing(t: dt.time) -> dt.time:
    if t.tzinfo == tz_beijing:
        return t
    if t.tzinfo is None:
        return t.replace(tzinfo=tz_beijing)
    temp: dt.datetime = dt.datetime.combine(
        dt.date.today(),
        t
    ).astimezone(tz=tz_trading)
    return temp.astimezone(tz=tz_beijing).timetz()


# 转换为交易时区
def to_tz_trading(t: dt.time) -> dt.time:
    if t.tzinfo == tz_trading:
        return t
    if t.tzinfo is None:
        t = t.replace(tzinfo=tz_beijing)
    temp: dt.datetime = dt.datetime.combine(
        dt.date.today(),
        t
    ).astimezone(tz=tz_beijing)
    return temp.astimezone(tz=tz_trading).timetz()


# 过滤
def drop_non_trading_data(
    df: pd.DataFrame,
    trading_time: ProductTradingTime
) -> pd.DataFrame:
    """
    Drop data in non-trading time.
    """
    if trading_time.count == 2:
        # 金融期货品种（中金所品种）。
        if df.index.inferred_type == 'datetime64':
            return df[
                (
                    ((df.index.timetz >= to_tz_trading(trading_time.sections[0])) & (df.index.timetz <= to_tz_trading(trading_time.sections[1]))) |
                    ((df.index.timetz >= to_tz_trading(trading_time.sections[2])) & (df.index.timetz <= to_tz_trading(trading_time.sections[3])))
                )
            ]
        else:
            return df[
                (
                    ((df.datetime.timetz >= to_tz_trading(trading_time.sections[0])) & (df.datetime.timetz <= to_tz_trading(trading_time.sections[1]))) |
                    ((df.datetime.timetz >= to_tz_trading(trading_time.sections[2])) & (df.datetime.timetz <= to_tz_trading(trading_time.sections[3])))
                )
            ]
    elif trading_time.count == 3:
        # 无夜盘商品期货品种。
        if df.index.inferred_type == 'datetime64':
            return df[
                (
                    ((df.index.timetz >= to_tz_trading(trading_time.sections[0])) & (df.index.timetz <= to_tz_trading(trading_time.sections[1]))) |
                    ((df.index.timetz >= to_tz_trading(trading_time.sections[2])) & (df.index.timetz <= to_tz_trading(trading_time.sections[3]))) |
                    ((df.index.timetz >= to_tz_trading(trading_time.sections[4])) & (df.index.timetz <= to_tz_trading(trading_time.sections[5])))
                )
            ]
        else:
            return df[
                (
                    ((df.datetime.dt.timetz >= to_tz_trading(trading_time.sections[0])) & (df.datetime.dt.timetz <= to_tz_trading(trading_time.sections[1]))) |
                    ((df.datetime.dt.timetz >= to_tz_trading(trading_time.sections[2])) & (df.datetime.dt.timetz <= to_tz_trading(trading_time.sections[3]))) |
                    ((df.datetime.dt.timetz >= to_tz_trading(trading_time.sections[4])) & (df.datetime.dt.timetz <= to_tz_trading(trading_time.sections[5])))
                )
            ]
    elif trading_time.count == 4:
        # 有夜盘商品期货品种。
        if df.index.inferred_type == 'datetime64':
            return df[
                (
                    ((df.index.timetz >= to_tz_trading(trading_time.sections[0])) & (df.index.timetz <= to_tz_trading(trading_time.sections[1]))) |
                    ((df.index.timetz >= to_tz_trading(trading_time.sections[2])) & (df.index.timetz <= to_tz_trading(trading_time.sections[3]))) |
                    ((df.index.timetz >= to_tz_trading(trading_time.sections[4])) & (df.index.timetz <= to_tz_trading(trading_time.sections[5]))) |
                    ((df.index.timetz >= to_tz_trading(trading_time.sections[6])) & (df.index.timetz <= to_tz_trading(trading_time.sections[7])))
                )
            ]
        else:
            return df[
                (
                    ((df.datetime.dt.timetz >= to_tz_trading(trading_time.sections[0])) & (df.datetime.dt.timetz <= to_tz_trading(trading_time.sections[1]))) |
                    ((df.datetime.dt.timetz >= to_tz_trading(trading_time.sections[2])) & (df.datetime.dt.timetz <= to_tz_trading(trading_time.sections[3]))) |
                    ((df.datetime.dt.timetz >= to_tz_trading(trading_time.sections[4])) & (df.datetime.dt.timetz <= to_tz_trading(trading_time.sections[5]))) |
                    ((df.datetime.dt.timetz >= to_tz_trading(trading_time.sections[6])) & (df.datetime.dt.timetz <= to_tz_trading(trading_time.sections[7])))
                )
            ]
    else:
        raise RuntimeError(f'Unknown product type. <count> < 2 or > 4. {trading_time.product}')

## 过滤 tick

In [2]:
# 加载品种交易时间。
product_trading_time: Dict[str, ProductTradingTime] = read_product_trading_time(DATA_PATH.joinpath('trading_time.csv'))
print(product_trading_time['SHFE.al'])

<ProductTradingTime(Symbol=SHFE.al, Count=4, Optional=1, Sections=[datetime.time(21, 0), datetime.time(1, 0), datetime.time(9, 0), datetime.time(10, 15), datetime.time(10, 30), datetime.time(11, 30), datetime.time(13, 30), datetime.time(15, 0)], )>


In [3]:
# 定义数据文件
data_file: str = 'SHFE.al2111_Tick.csv'

# 转为Path格式。
data_path: Path = DATA_PATH.joinpath(data_file)

# 加载tick数据为DataFrame。
df_origin: pd.DataFrame = pd.read_csv(data_path, parse_dates=['datetime'], index_col=['datetime'])

print(df_origin.head(5))

                               datetime_nano  last_price  highest   lowest  \
datetime                                                                     
2021-06-30 18:51:42.300  1625050302300000000         NaN      NaN      NaN   
2021-06-30 20:59:00.500  1625057940500000000     18780.0  18780.0  18780.0   
2021-06-30 21:00:00.500  1625058000500000000     18780.0  18780.0  18780.0   
2021-06-30 21:00:01.000  1625058001000000000     18780.0  18780.0  18780.0   
2021-06-30 21:00:01.500  1625058001500000000     18780.0  18785.0  18780.0   

                         volume     amount  open_interest  bid_price1  \
datetime                                                                
2021-06-30 18:51:42.300       0        0.0          10879         NaN   
2021-06-30 20:59:00.500       4   375600.0          10880     18780.0   
2021-06-30 21:00:00.500       8   751200.0          10880     18780.0   
2021-06-30 21:00:01.000      10   939000.0          10880     18780.0   
2021-06-30 21:0

In [4]:
df = df_origin.copy()

df.index = df.index.tz_localize(tz_beijing)
df.index = df.index.tz_convert(tz_trading)
print(df.head(5))

                                        datetime_nano  last_price  highest  \
datetime                                                                     
2021-06-30 22:51:42.300000+12:00  1625050302300000000         NaN      NaN   
2021-07-01 00:59:00.500000+12:00  1625057940500000000     18780.0  18780.0   
2021-07-01 01:00:00.500000+12:00  1625058000500000000     18780.0  18780.0   
2021-07-01 01:00:01+12:00         1625058001000000000     18780.0  18780.0   
2021-07-01 01:00:01.500000+12:00  1625058001500000000     18780.0  18785.0   

                                   lowest  volume     amount  open_interest  \
datetime                                                                      
2021-06-30 22:51:42.300000+12:00      NaN       0        0.0          10879   
2021-07-01 00:59:00.500000+12:00  18780.0       4   375600.0          10880   
2021-07-01 01:00:00.500000+12:00  18780.0       8   751200.0          10880   
2021-07-01 01:00:01+12:00         18780.0      10   939000

In [5]:
df = drop_non_trading_data(df, product_trading_time['SHFE.al'])
print(df)

                                        datetime_nano  last_price  highest  \
datetime                                                                     
2021-07-01 01:00:00.500000+12:00  1625058000500000000     18780.0  18780.0   
2021-07-01 01:00:01+12:00         1625058001000000000     18780.0  18780.0   
2021-07-01 01:00:01.500000+12:00  1625058001500000000     18780.0  18785.0   
2021-07-01 01:00:02+12:00         1625058002000000000     18780.0  18795.0   
2021-07-01 01:00:02.500000+12:00  1625058002500000000     18780.0  18795.0   
...                                               ...         ...      ...   
2021-09-25 04:59:58+12:00         1632502798000000000     22945.0  23150.0   
2021-09-25 04:59:58.500000+12:00  1632502798500000000     22945.0  23150.0   
2021-09-25 04:59:59+12:00         1632502799000000000     22945.0  23150.0   
2021-09-25 04:59:59.500000+12:00  1632502799500000000     22945.0  23150.0   
2021-09-25 04:59:59.500001+12:00  1632502799500001000     22945.

## 过滤 bar。

In [6]:
df = df_origin.copy()

df.index = df.index.tz_localize(tz_beijing)
df.index = df.index.tz_convert(tz_trading)

df_1min = df['last_price'].resample('1MIN').ohlc()
df_1min = drop_non_trading_data(df_1min, product_trading_time['SHFE.al'])

print(df_1min)

                              open     high      low    close
datetime                                                     
2021-07-01 01:00:00+12:00  18780.0  18800.0  18780.0  18790.0
2021-07-01 01:01:00+12:00  18790.0  18820.0  18790.0  18815.0
2021-07-01 01:02:00+12:00  18815.0  18835.0  18815.0  18815.0
2021-07-01 01:03:00+12:00  18815.0  18820.0  18815.0  18820.0
2021-07-01 01:04:00+12:00  18820.0  18835.0  18820.0  18835.0
...                            ...      ...      ...      ...
2021-09-25 04:55:00+12:00  22955.0  22960.0  22945.0  22950.0
2021-09-25 04:56:00+12:00  22950.0  22960.0  22945.0  22950.0
2021-09-25 04:57:00+12:00  22950.0  22955.0  22950.0  22950.0
2021-09-25 04:58:00+12:00  22955.0  22960.0  22945.0  22955.0
2021-09-25 04:59:00+12:00  22950.0  22960.0  22945.0  22945.0

[40574 rows x 4 columns]


In [7]:
df_5min = df['last_price'].resample('5MIN').ohlc()
df_5min = drop_non_trading_data(df_5min, product_trading_time['SHFE.al'])

print(df_5min)

                              open     high      low    close
datetime                                                     
2021-07-01 01:00:00+12:00  18780.0  18835.0  18780.0  18835.0
2021-07-01 01:05:00+12:00  18835.0  18915.0  18835.0  18905.0
2021-07-01 01:10:00+12:00  18910.0  18945.0  18910.0  18945.0
2021-07-01 01:15:00+12:00  18945.0  18950.0  18915.0  18930.0
2021-07-01 01:20:00+12:00  18930.0  18950.0  18925.0  18925.0
...                            ...      ...      ...      ...
2021-09-25 04:35:00+12:00  22995.0  23020.0  22980.0  22990.0
2021-09-25 04:40:00+12:00  22990.0  23005.0  22960.0  22970.0
2021-09-25 04:45:00+12:00  22970.0  22975.0  22955.0  22960.0
2021-09-25 04:50:00+12:00  22960.0  22980.0  22935.0  22955.0
2021-09-25 04:55:00+12:00  22955.0  22960.0  22945.0  22945.0

[8390 rows x 4 columns]
