# Tick2Bar

Convert tick to Bar (1 minute).

In [1]:
# This allows multiple outputs from a single jupyter notebook cell:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
from typing import List, Dict
import os
from pathlib import Path
import datetime as dt

import matplotlib.pyplot as plt
import mplfinance as mpf
import pandas as pd
import numpy as np
import math

In [3]:
# Definations.

# Package path.
PACKAGE_PATH: Path = Path(os.path.abspath(''))

# Timezone.
tz_beijing: dt.timezone = dt.timezone(dt.timedelta(hours=8))
tz_trading: dt.timezone = dt.timezone(dt.timedelta(hours=12))

tz_delta: dt.timedelta = dt.timedelta(hours=4)

In [4]:
def filter_non_trading_data(df: pd.DataFrame) -> pd.DataFrame:
    """
    Drop data in non-trading time.
    """
    if df.index.inferred_type == 'datetime64':
        return df[
            (
                ((df.index.time >= dt.datetime.strptime('00:59', '%H:%M').time()) & (df.index.time <= dt.datetime.strptime('05:00', '%H:%M').time())) |
                ((df.index.time >= dt.datetime.strptime('12:59', '%H:%M').time()) & (df.index.time <= dt.datetime.strptime('14:15', '%H:%M').time())) |
                ((df.index.time >= dt.datetime.strptime('14:30', '%H:%M').time()) & (df.index.time <= dt.datetime.strptime('15:30', '%H:%M').time())) |
                ((df.index.time >= dt.datetime.strptime('17:30', '%H:%M').time()) & (df.index.time <= dt.datetime.strptime('19:00', '%H:%M').time()))
            )
        ]
    else:
        return df[
            (
                ((df.datetime.dt.time >= dt.datetime.strptime('00:59', '%H:%M').time()) & (df.datetime.dt.time <= dt.datetime.strptime('05:00', '%H:%M').time())) |
                ((df.datetime.dt.time >= dt.datetime.strptime('12:59', '%H:%M').time()) & (df.datetime.dt.time <= dt.datetime.strptime('14:15', '%H:%M').time())) |
                ((df.datetime.dt.time >= dt.datetime.strptime('14:30', '%H:%M').time()) & (df.datetime.dt.time <= dt.datetime.strptime('15:30', '%H:%M').time())) |
                ((df.datetime.dt.time >= dt.datetime.strptime('17:30', '%H:%M').time()) & (df.datetime.dt.time <= dt.datetime.strptime('19:00', '%H:%M').time()))
            )
        ]

def pbx(df: pd.DataFrame) -> pd.DataFrame:
    """
    Add PBX indicator.
    """
    df['PBX4'] = (df['close'].ewm(span=4, adjust=False).mean()+df['close'].rolling(4*2).mean()+df['close'].rolling(4*4).mean())/3
    df['PBX6'] = (df['close'].ewm(span=6, adjust=False).mean()+df['close'].rolling(6*2).mean()+df['close'].rolling(6*4).mean())/3
    df['PBX24'] = (df['close'].ewm(span=24, adjust=False).mean()+df['close'].rolling(24*2).mean()+df['close'].rolling(24*4).mean())/3
    return df

In [5]:
# data file.
tick_path: Path = PACKAGE_PATH.joinpath('SHFE.al2111_Tick.csv')

In [6]:
df_origin: pd.DataFrame = pd.read_csv(tick_path)
df_origin['datetime'] = pd.to_datetime(df_origin['datetime'])
print(f'Length of <df_origin>: {len(df_origin)}')

Length of <df_origin>: 1468862


In [7]:
# Change df timezone from Beijing to Trading.
df_trading = df_origin
df_trading['datetime'] = df_origin['datetime'] + dt.timedelta(hours=4)
print(f'Length of <df_trading>: {len(df_trading)}')

Length of <df_trading>: 1468862


In [8]:
df_trading = filter_non_trading_data(df_trading)
print(f'Length of <df_trading>: {len(df_trading)}')

Length of <df_trading>: 1468801


In [9]:
# Set df index to DateTimeIndex.
df_trading = df_trading.set_index('datetime')
print(df_trading.index)

DatetimeIndex(['2021-07-01 00:59:00.500000', '2021-07-01 01:00:00.500000',
                      '2021-07-01 01:00:01', '2021-07-01 01:00:01.500000',
                      '2021-07-01 01:00:02', '2021-07-01 01:00:02.500000',
                      '2021-07-01 01:00:03', '2021-07-01 01:00:03.500000',
                      '2021-07-01 01:00:04', '2021-07-01 01:00:04.500000',
               ...
               '2021-09-25 04:59:55.500000',        '2021-09-25 04:59:56',
               '2021-09-25 04:59:56.500000',        '2021-09-25 04:59:57',
               '2021-09-25 04:59:57.500000',        '2021-09-25 04:59:58',
               '2021-09-25 04:59:58.500000',        '2021-09-25 04:59:59',
               '2021-09-25 04:59:59.500000', '2021-09-25 04:59:59.500001'],
              dtype='datetime64[ns]', name='datetime', length=1468801, freq=None)


In [10]:
df_1min = df_trading['last_price'].resample('1MIN').ohlc()
print(df_1min)

                        open     high      low    close
datetime                                               
2021-07-01 00:59:00  18780.0  18780.0  18780.0  18780.0
2021-07-01 01:00:00  18780.0  18800.0  18780.0  18790.0
2021-07-01 01:01:00  18790.0  18820.0  18790.0  18815.0
2021-07-01 01:02:00  18815.0  18835.0  18815.0  18815.0
2021-07-01 01:03:00  18815.0  18820.0  18815.0  18820.0
...                      ...      ...      ...      ...
2021-09-25 04:55:00  22955.0  22960.0  22945.0  22950.0
2021-09-25 04:56:00  22950.0  22960.0  22945.0  22950.0
2021-09-25 04:57:00  22950.0  22955.0  22950.0  22950.0
2021-09-25 04:58:00  22955.0  22960.0  22945.0  22955.0
2021-09-25 04:59:00  22950.0  22960.0  22945.0  22945.0

[124081 rows x 4 columns]


In [11]:
df_1min = df_1min[
    (
        ((df_1min.index.time >= dt.datetime.strptime('00:59', '%H:%M').time()) & (df_1min.index.time <= dt.datetime.strptime('05:00', '%H:%M').time())) |
        ((df_1min.index.time >= dt.datetime.strptime('12:59', '%H:%M').time()) & (df_1min.index.time <= dt.datetime.strptime('14:15', '%H:%M').time())) |
        ((df_1min.index.time >= dt.datetime.strptime('14:30', '%H:%M').time()) & (df_1min.index.time <= dt.datetime.strptime('15:30', '%H:%M').time())) |
        ((df_1min.index.time >= dt.datetime.strptime('17:30', '%H:%M').time()) & (df_1min.index.time <= dt.datetime.strptime('19:00', '%H:%M').time()))
    )
]
print(df_1min)

                        open     high      low    close
datetime                                               
2021-07-01 00:59:00  18780.0  18780.0  18780.0  18780.0
2021-07-01 01:00:00  18780.0  18800.0  18780.0  18790.0
2021-07-01 01:01:00  18790.0  18820.0  18790.0  18815.0
2021-07-01 01:02:00  18815.0  18835.0  18815.0  18815.0
2021-07-01 01:03:00  18815.0  18820.0  18815.0  18820.0
...                      ...      ...      ...      ...
2021-09-25 04:55:00  22955.0  22960.0  22945.0  22950.0
2021-09-25 04:56:00  22950.0  22960.0  22945.0  22950.0
2021-09-25 04:57:00  22950.0  22955.0  22950.0  22950.0
2021-09-25 04:58:00  22955.0  22960.0  22945.0  22955.0
2021-09-25 04:59:00  22950.0  22960.0  22945.0  22945.0

[40747 rows x 4 columns]


In [12]:
# df_min1: pd.DataFrame = df_test['2021-09-24 20:59:00.000000':'2021-09-24 21:01:00.000000']
# price_open: float = df_min1.iloc[1].at ['last_price']
# price_close: float = df_min1.iloc[-1].at ['last_price']
# price_high: float = df_min1['last_price'].max()
# price_low: float = df_min1['last_price'].min()
# print(f'Open: {price_open}, High: {price_high}, Low: {price_low}, Close: {price_close}')

In [13]:
df_5min = df_1min.resample('5MIN').ohlc()
print(df_5min)

                        open                                high           \
                        open     high      low    close     open     high   
datetime                                                                    
2021-07-01 00:55:00  18780.0  18780.0  18780.0  18780.0  18780.0  18780.0   
2021-07-01 01:00:00  18780.0  18820.0  18780.0  18820.0  18800.0  18835.0   
2021-07-01 01:05:00  18835.0  18870.0  18835.0  18870.0  18855.0  18915.0   
2021-07-01 01:10:00  18910.0  18930.0  18910.0  18920.0  18925.0  18945.0   
2021-07-01 01:15:00  18945.0  18945.0  18920.0  18935.0  18950.0  18950.0   
...                      ...      ...      ...      ...      ...      ...   
2021-09-25 04:35:00  22995.0  23005.0  22985.0  22985.0  23005.0  23020.0   
2021-09-25 04:40:00  22990.0  22995.0  22965.0  22965.0  23005.0  23005.0   
2021-09-25 04:45:00  22970.0  22970.0  22960.0  22970.0  22975.0  22975.0   
2021-09-25 04:50:00  22960.0  22970.0  22960.0  22970.0  22965.0  22980.0   

In [14]:
# 计算瀑布线
df_1min['PBX4'] = (df_1min['close'].ewm(span=4, adjust=False).mean()+df_1min['close'].rolling(4*2).mean()+df_1min['close'].rolling(4*4).mean())/3
df_1min['PBX6'] = (df_1min['close'].ewm(span=6, adjust=False).mean()+df_1min['close'].rolling(6*2).mean()+df_1min['close'].rolling(6*4).mean())/3
df_1min['PBX24'] = (df_1min['close'].ewm(span=24, adjust=False).mean()+df_1min['close'].rolling(24*2).mean()+df_1min['close'].rolling(24*4).mean())/3

In [15]:
print(df_1min.loc['2021-09-24 00:59:00.000000':, :].head(500))

                        open     high      low    close          PBX4  \
datetime                                                                
2021-09-24 00:59:00  23565.0  23565.0  23565.0  23565.0           NaN   
2021-09-24 01:00:00  23590.0  23635.0  23575.0  23590.0           NaN   
2021-09-24 01:01:00  23590.0  23600.0  23520.0  23525.0           NaN   
2021-09-24 01:02:00  23525.0  23560.0  23525.0  23535.0           NaN   
2021-09-24 01:03:00  23535.0  23540.0  23450.0  23495.0           NaN   
...                      ...      ...      ...      ...           ...   
2021-09-25 01:23:00  22950.0  22970.0  22940.0  22965.0  22944.423412   
2021-09-25 01:24:00  22960.0  22965.0  22930.0  22930.0  22939.674880   
2021-09-25 01:25:00  22930.0  22965.0  22925.0  22960.0  22943.075762   
2021-09-25 01:26:00  22965.0  22975.0  22945.0  22965.0  22947.720457   
2021-09-25 01:27:00  22960.0  22965.0  22940.0  22940.0  22946.611441   

                             PBX6  PBX24  
datetim