In [1]:
import ipywidgets as widgets
# import logging

class OutputWidgetHandler:
    """ Custom logging handler sending logs to an output widget """

    def __init__(self, *args, **kwargs):
        # super(OutputWidgetHandler, self).__init__(*args, **kwargs)
        layout = {
            'width': '100%',
            'height': '160px',
            'border': '1px solid black',
#             'overflow_y':'hidden',
            'overflow_y':'scroll',
        }
        self.out = widgets.Textarea(layout=layout)
        self.lines = list()
        self.maxlines = 999
    
    def info(self,msg_str):
        self.emit(msg_str,'INFO')
    
    def warn(self,msg_str):
        self.emit(msg_str,'WARN')
    
    def error(self,msg_str):
        self.emit(msg_str,'ERROR')
    
    def emit(self, record,level):
        """ Overload of logging.Handler method """
        formatted_record = f'{datetime.now()} - [{level}] {record}'
        # if isinstance(record,str):
        #     formatted_record = f'{datetime.now()} - [{level}] {record}'
        # else:
        #     formatted_record = self.format(record)
        
        self.lines.append(formatted_record)
        # new_output = [ formatted_record+'\n',]
        
#         self.out.outputs = (new_output, )

        # alllen = 0
        # oldoutputs = list()
        # for output in self.out.outputs:
        #     alllen += len(output['text'])
        #     if alllen > 10240:
        #         break
        #     oldoutputs.append(output)
        # self.out.outputs = (new_output, ) + tuple(oldoutputs) 
        self.lines = self.lines[-self.maxlines:]
        self.out.value = '\n'.join(reversed(self.lines))

    def show_logs(self):
        """ Show the logs """
        # display(self.out)
        pass

    def clear_logs(self):
        """ Clear the current logs """
        # self.out.clear_output()
        self.out.clear.value=''



logger = OutputWidgetHandler()
# handler.setFormatter(logging.Formatter('%(asctime)s  - [%(levelname)s] %(message)s'))
# logger.addHandler(handler)
# logger.setLevel(logging.INFO)

In [2]:
%matplotlib widget

In [3]:
import pandas as pd
# import feather
import zmq
import orjson
from copy import deepcopy
import ipywidgets as widgets
from threading import Thread
from datetime import datetime,timedelta
import time

from sqlalchemy import create_engine
import pandas as pd
import numpy as np
import os,sys,glob

import matplotlib.pyplot as plt

from pylab import mpl
    
mpl.rcParams['font.family'] = 'kaiti'
mpl.rcParams['axes.unicode_minus'] = False

pd.set_option('expand_frame_repr', False)
pd.set_option('display.max_rows', 20)

from progressbar import ProgressBar
from datetime import datetime,timedelta

from concurrent.futures import ProcessPoolExecutor, as_completed
from functools import lru_cache


from dateutil.relativedelta import relativedelta

from matplotlib.widgets import SpanSelector
from collections import Counter
from ipywidgets import GridspecLayout



In [4]:
error_acc = widgets.Accordion(children=[logger.out], )
error_acc.set_title(0,'LogViewer')



asset_category = widgets.Dropdown(options=['stock','index','FUT','OPT','FX',],value='stock',layout=widgets.Layout(width='120px',))
underlying_list = widgets.Select(options=list(),layout=widgets.Layout(width='120px',height='900px'))
range_list = widgets.SelectMultiple(options=list(),layout=widgets.Layout(width='120px',height='900px'))
# comb_list = widgets.Select(options=list(),layout=widgets.Layout(width='200px',height='300px'))

reload_but = widgets.Button(description='Reset',layout=widgets.Layout(width='80px',))

comb_but =  widgets.Button(description='Combine',layout=widgets.Layout(width='80px',))
redo_but =  widgets.Button(description='Redo',layout=widgets.Layout(width='80px',))

sample_trades_slider = widgets.IntSlider(description='Trds',value=5,min=1,max=240,step=1,layout=widgets.Layout(width='240px',))
sample_min_slider = widgets.IntSlider(description='Mins',value=1,min=1,max=240,step=1,layout=widgets.Layout(width='240px',))
comb_range_slider = widgets.IntSlider(description='Rng(‰)',value=5,min=1,max=100,step=1,layout=widgets.Layout(width='240px',))

resample_but = widgets.Button(description='Resample',layout=widgets.Layout(width='80px',))
start_date = widgets.DatePicker(value=datetime.now().date() - relativedelta(days=5),layout=widgets.Layout(width='160px',))
end_date = widgets.DatePicker(value=datetime.now().date(),layout=widgets.Layout(width='160px',))
# underlying_tab.datamodel = rt.DataFrameDataModel(pd.DataFrame())

# abandon jcplot Output cause capture not work, using canvas
# jcplot = widgets.Output() #layout=widgets.Layout(width='1200px',height='600px')) # rt.RegularTableWidget(pd.DataFrame(),)
gs = GridspecLayout(1, 1, 
                width='1200px',
                   )

main_tabs = widgets.VBox([widgets.HBox([asset_category,reload_but,sample_trades_slider,sample_min_slider,comb_range_slider,
                                        start_date, end_date,
                                        resample_but,  comb_but, redo_but, ]), 
                          widgets.HBox([underlying_list, range_list,
                                        gs,
                                       ])])
main_form = widgets.VBox([error_acc,main_tabs])


In [6]:
def get_productID(code,):
    for idx,s in enumerate(code):
        if s.isdigit():
            return code[:idx]
    return ''

    

def get_opt_underlying_strike(opt_code, callputchar):
    suffix = opt_code[2:].upper()
    x = suffix.find(callputchar)
    if x < 0:
        raise Exception('no c or p found')
    
    strike_str = suffix[x+1:]
    under = opt_code[:2]+suffix[:x]
    if under.startswith('IO'):
        under = 'IF' + under[2:]
    return under, int(strike_str)

def get_contract(fp):
    fn = os.path.basename(fp)
    contract_code_ori,tday = fn.split('.csv')[0].split('_')
    contract_code = contract_code_ori.replace('-','')
    # first 2 chars productid?
    suffix = contract_code[2:].upper()
    info = dict()
    info['fp'] = fp
    info['tday'] = tday
    info['original_contract_code'] = contract_code_ori
    if 'C' in suffix:
        info['iscall'] = True
        info['contract_class'] = 'OPT'
    elif 'P' in suffix:
        info['iscall'] = False
        info['contract_class'] = 'OPT'
    else:
        info['contract_class'] = 'FUT'
    
    info['productID'] = get_productID(contract_code)
    if info['contract_class'] == 'OPT':
        if info['iscall']:
            underlying,strike = get_opt_underlying_strike(contract_code,'C')
        else:
            underlying,strike = get_opt_underlying_strike(contract_code,'P')
        info['underlying'] = underlying
        info['strike'] = strike
    elif info['contract_class'] == 'FUT':
        info['underlying'] = contract_code
    return info
        
        

In [7]:
# today_contract_df

In [8]:
#     today_contract_df = today_contract_df[today_contract_df['contract_class']=='OPT']
#     today_contract_df


In [9]:
# store data segments
time_chains_dict = dict()
# store current figure param
time_chains_key_list = list()
# store for triangle starts X, for SpanSelector
last_dist_starts = list()
redo_list = list()

In [10]:
# store ranges
# splitted_range_dict = dict()

In [11]:
# store list of ranges
# combined_range_dict = dict()

In [12]:
future_rootdir = '/home/jesse/DataLinks/104_mntdisk1jesse/ctpdata_reformat/'

In [13]:
forex_rootdir = '/home/jesse/DataLinks/104_mntdisk1jesse/histdata.com/'

In [14]:

def on_reload(b):
    # global splitted_range_dict
    logger.info("loading stock infos")
    asset_cat = asset_category.value
    if asset_cat in ['stock','index',]:
        stock_info_df = pd.read_parquet(f'/home/jesse/DataLinks/StockDB/JQData_static(allSecuritiesInfo)/{asset_cat}.parquet')
        stock_list = sorted(stock_info_df['jq_code'].unique().tolist())
        underlying_list.options = stock_list
    elif asset_cat in ['FUT','OPT',]:
        
        fps_lastyear = glob.glob(os.path.join(future_rootdir,f'{datetime.now().year-1}*','*.csv*'))
        fps_thisyear = glob.glob(os.path.join(future_rootdir,f'{datetime.now().year}*','*.csv*'))
        fps = fps_lastyear + fps_thisyear
        len(fps)
        today_contract_df = pd.DataFrame([get_contract(item) for item in fps]) #get_contract('MA003P2025_20191223'),get_contract('i2002-P-620_20191223')
        today_contract_df.shape

        today_contract_df = today_contract_df.drop_duplicates(subset=['original_contract_code'])
        sel_df = today_contract_df[today_contract_df['contract_class']==asset_cat]
        underlying_list.options = sorted(sel_df['original_contract_code'].unique().tolist())
    elif asset_cat in ['FX']:
        fps = glob.glob(os.path.join(forex_rootdir,'DAT_ASCII_*.csv*'))
        underlying_list.options = sorted(pd.Series([os.path.basename(fp).split('_')[2] for fp in fps]).unique().tolist())
    # underlying_list.value = None
    
    

In [16]:
reload_but.on_click(on_reload)

In [17]:
# inst = '000001.XSHE'

# alldf['split_key'] = alldf['timestamp'].apply(dt2int) * 100
# alldf['comb_key'] = alldf['split_key'] 

In [18]:
# alldf

In [20]:
def dt2int(x):
    return int(x.year * 10**8 + x.month * 10**6 + x.day * 10**4 + x.hour * 10**2 + x.minute)
    
def int2dt(dint):
    dstr = str(int(dint))
    return datetime(int(dstr[:4]), int(dstr[4:6]), int(dstr[6:8]), int(dstr[8:10]), int(dstr[10:12]), )

def dtsampler(_dt_list, comb_range=1):
    # dt_list in min
    dt_list = list(map(dt2int, _dt_list))
    res_key = list()
    count = 0
    lts = None
    lastdtmin1_open = None
    for dtmin1 in dt_list:
        if dtmin1 != lts:   
            if count == comb_range:
                count = 0
                lastdtmin1_open = None
            if lastdtmin1_open is None:
                lastdtmin1_open = dtmin1
            lts = dtmin1    
            count +=1
            
        res_key.append(lastdtmin1_open)
        # if dtmin1 != lts:
            # reset count and lastdt
            
                # lastdtmin1_open = None
            # lts = dtmin1
    return res_key

In [21]:
# dtsampler(pd.to_datetime(['2021-01-01 09:30:00',
#                           '2021-01-01 09:30:00',
#                           '2021-01-01 09:30:03',
#                           '2021-01-01 09:30:04',
#                           '2021-01-01 09:31:01',
#                           '2021-01-01 09:31:02',
#                           '2021-01-01 09:32:01',
#                           '2021-01-01 09:32:01',
#                           '2021-01-01 09:32:01',
#                           '2021-01-01 09:33:01',
#                           '2021-01-01 09:33:02',
#                           '2021-01-01 09:33:03',
#                           '2021-01-01 09:33:05',
#                           '2021-01-01 09:34:00',
#                          ]),2)

In [22]:
# [1,2,1,1,13].index(1)

In [23]:
# time_chains[0]
# pd.Series([1,2,3])[pd.Series([1,2,3]).mod(2).eq(0)]

In [26]:
# indicates current span selection start and end
cur_last_dist_st, cur_last_dist_ed = 0,0


In [27]:
def span_onselect(xmin, xmax):
    global cur_last_dist_st, cur_last_dist_ed
    # store combined time ranges
    indmin, indmax = np.searchsorted(last_dist_starts, (xmin, xmax))
    indmin -= 1
    indmax -= 1
    indmin = max(0, indmin)
    indmax = min(len(last_dist_starts) - 1, indmax)    
    cur_last_dist_st, cur_last_dist_ed = last_dist_starts[indmin], last_dist_starts[indmax]
    logger.info(f'selected indmin,indmax:st,ed {indmin} {indmax} {last_dist_starts[indmin]} {last_dist_starts[indmax]}')


In [28]:
fig,span,ax = None,None,None
_siz = (12,8)

In [29]:
def refresh_plots(b):
    global time_chains_key_list,redo_list,last_dist_starts,span,ax,gs,fig
    is_redo = False
    if isinstance(b,str):
        if b == 'redo':
            is_redo=True
            logger.info(f"== redolist bef:len {len(redo_list)} {[item['ts_list'] for item in redo_list[-1]]}")
            redo_list.pop()
            logger.info(f"== redolist aft:len {len(redo_list)} {[item['ts_list'] for item in redo_list[-1]]}")
            time_chains_key_list = deepcopy(redo_list[-1])
            # logger.info(f"--- redo_list:{redo_list}")
    logger.info(f"ooo ts_list:{[item['ts_list'] for item in time_chains_key_list]}")
    # global fig, fig_key 
    # logger.info('start ploting')
    buckets = list()
    last_flag_in_group = False
    # ts must be sorted!!!    
    if len(time_chains_key_list) > 0:
        for item in time_chains_key_list:
            if is_redo:
                buckets.append(item['ts_list'])
            elif not last_flag_in_group and item['last_dist_start'] >= cur_last_dist_st and item['last_dist_start'] <= cur_last_dist_ed:
                last_flag_in_group = True
                buckets.append(list())
                buckets[-1] = item['ts_list']
            elif last_flag_in_group and item['last_dist_start'] >= cur_last_dist_st and item['last_dist_start'] <= cur_last_dist_ed:
                # continue add points into current buckets
                buckets[-1].extend(item['ts_list'])
            elif last_flag_in_group and item['last_dist_start'] > cur_last_dist_ed:
                buckets.append(item['ts_list'])
                # jump out current buckets
                last_flag_in_group = False
            else:
                # add original range
                buckets.append(item['ts_list'])
            
    else:
        # first plot
        for ts in sorted(range_list.options):
            buckets.append([ts])
    # for ts in sorted(range_list.options):
    #     if not last_flag_in_group and ts in range_list.value:
    #         # enter a new combined range
    #         last_flag_in_group = True
    #         buckets.append(list())
    #     elif last_flag_in_group and ts in range_list.value:
    #         # continue add points into current buckets
    #         buckets[-1].append(ts)
    #     elif last_flag_in_group and ts not in range_list.value:
    #         buckets[-1].append(ts)
    #         # jump out current buckets
    #         last_flag_in_group = False
    #     else:
    #         # add original range
    #         buckets.append([ts])            
    
        
    # force close figure
    if fig is None:
        # plt.close(fig=fig)
        fig, ax = plt.subplots()  
        gs[0,0] = fig.canvas
        
        fig.set_size_inches(_siz)
        fig.canvas.toolbar_visible = False
        fig.canvas.header_visible = False
        # fig.canvas.layout.min_width = f'{_siz[0]}in'
        # fig.canvas.layout.min_height = f'{_siz[1]}in'
        
    # before every refreshing
    ax.clear()
     
    if len(time_chains_key_list) > 0:             
        time_chains_key_list = list()
        
    last_dist_start = 0
    for ts_list in buckets:
        # start delta
        dist = dict()
        mark_tri = False
        # len > 1, combined range !!, triangle markable        
        if len(ts_list) > 0:
            mark_tri = True

        # combined subdfs are in but_l
        open_prc = None
        close_prc = None
        # adf = pd.concat([time_chains_dict[_ts] for _ts in ts_list])
        prc_vector = np.concatenate([time_chains_dict[_ts] for _ts in ts_list])
        # open_prc, close_prc = adf['tradp'].iloc[0],adf['tradp'].iloc[-1]
        open_prc, close_prc = prc_vector[0], prc_vector[-1]
        # dist = Counter(adf['tradp'])
        dist = Counter(prc_vector)
        

        # for pp in dist.keys():
        #     plt.plot([last_dist_start+1, last_dist_start+1+dist[pp]],[pp,pp],'b')
        _bins = ax.hist(prc_vector, bins=max(10,int((prc_vector.max()-prc_vector.min())/sample_trades_slider.value)), orientation='horizontal', bottom=last_dist_start, color='b')
        _bins_cnt = _bins[0].max()
        
        if mark_tri:
            # cal tri endpoints and plot
            balanced_prc = -1
            balanced_cnt = -1
            # find max counts' price level
            for _prc,_cnt in dist.items():
                if _cnt > balanced_cnt:
                    balanced_cnt = _cnt
                    balanced_prc = _prc


            up_p = max(dist.keys())
            dn_p = min(dist.keys())
            
            dn_p = balanced_prc - (up_p - balanced_prc)
            
            # logger.info(f'mirror to DN:{dn_p} Mid:{balanced_prc} UP:{up_p}')
            markerup = 'r'
            markerdn = 'r-.'
            markerbl = 'y'
            # mirror point is in bottom
            # if up_p - balanced_prc > balanced_prc - dn_p:
            #     dn_p = balanced_prc - (up_p - balanced_prc)
            #     logger.info(f'mirror to DN:{dn_p} Mid:{balanced_prc} UP:{up_p}')
            #     markerup = 'r'
            #     markerdn = 'r-.'
            # elif up_p - balanced_prc < balanced_prc - dn_p:
            #     up_p = balanced_prc + (balanced_prc - dn_p)
            #     logger.info(f'mirror to UP:{up_p} Mid:{balanced_prc} DN:{dn_p}')
            #     markerup = 'r-.'
            #     markerdn = 'r'

            # plot mirror triangle
            ax.plot([last_dist_start+1, last_dist_start+1+_bins_cnt],[up_p, balanced_prc],markerup)
            ax.plot([last_dist_start+1, last_dist_start+1+_bins_cnt],[dn_p, balanced_prc],markerdn)
            ax.plot([last_dist_start+1, last_dist_start+1+_bins_cnt],[balanced_prc, balanced_prc],markerbl)
            # ax.annotate(f'{round(balanced_prc,4)}', (last_dist_start+_bins_cnt/2, balanced_prc) )
            ax.text(last_dist_start+_bins_cnt/2, balanced_prc,f'{round(balanced_prc,4)}')
        # plot open close markers
        ax.plot([last_dist_start+1],[open_prc],'ro', markersize=3)
        ax.plot([last_dist_start+1],[close_prc],'go', markersize=3)
            # plt.pause(0.1)
            # logger.info(f'')-
        # cal next X start point
        max_counts = max(dist.values())
        time_chains_key_list.append({
            # 'ts_st':adf['timestamp'].iloc[0],
            # 'ts_ed':adf['timestamp'].iloc[-1],
            # 'balanced_prc':balanced_prc,
            # 'balanced_cnt':balanced_cnt,
            # '_bins_cnt':_bins_cnt,
            'last_dist_start':last_dist_start,
            'ts_list':ts_list,
        })
        
        
        last_dist_start += _bins_cnt
    # update global starts for SpanSelector 
    last_dist_starts = [item['last_dist_start'] for item in time_chains_key_list]
    logger.info(f"DONE ploting last_dist_starts:{last_dist_starts}")
    logger.info(f"DONE ploting ts_lists:{[item['ts_list'] for item in time_chains_key_list]}")
    # add to action_hist
    if not is_redo:
        redo_list.append(deepcopy(time_chains_key_list))  
    redo_but.description=f'Redo({len(redo_list)})'
    # logger.info('DONE ploting')

    # fig.canvas.draw_idle()
    span = SpanSelector(
                            ax,
                            span_onselect,
                            "horizontal",
                            useblit=True, # Set useblit=True on most backends for enhanced performance.
                            props=dict(alpha=0.5, facecolor="blue"),
                            interactive=True,
                            # drag_from_anywhere=True,
                            )
    fig.set_size_inches(_siz)

In [30]:
def on_comb(b):
    if cur_last_dist_st == cur_last_dist_ed:
        logger.error("MUST select a ragion > 1 triagles!!!")
    else:
        refresh_plots(None) 

In [31]:
comb_but.on_click(on_comb)

In [34]:
def get_bp(ser):
    dist = Counter(ser)
    # cal tri endpoints and plot
    balanced_prc = -1
    balanced_cnt = -1
    # find max counts' price level
    for _prc,_cnt in dist.items():
        if _cnt > balanced_cnt:
            balanced_cnt = _cnt
            balanced_prc = _prc
    return balanced_prc,balanced_cnt

In [35]:
# max({9:45,10:55}.values())
def dt2hms(dt):
    return dt.hour*10000+dt.minute*100+dt.second

In [36]:

auto_make_tri_mode = 'sym_tri'
auto_make_tri_mode = 'tri_done'
auto_make_tri_mode = ''
# assert not auto_make_tri_mode

In [44]:
def read_ctp_tick(fp):
    df = pd.read_csv(fp,).rename(columns={'LastPrice':'tradp','local_timestamp':'timestamp'})
    df = df[['tradp','timestamp']]
    df.loc[:,'timestamp'] = pd.to_datetime(df['timestamp'])
    return df

In [37]:
def on_select_inst(b):
    global time_chains_dict,time_chains_key_list,last_dist_starts,redo_list
    if isinstance(b,str):
        inst = b
    else:
        inst = b.new
    
    logger.info(f'loading single sym:{inst}')
    time_chains_dict = dict()
    dfs = list()
    if asset_category.value in ['stock','index']:
        if asset_category.value == 'stock':
            indir = '/home/jesse/DataLinks/l2_data_combined_feather_splitted_jl/trade_data/'
            cols = ['securityid','timestamp','tradp','tradv']
    
        elif asset_category.value == 'index':
            indir = '/home/jesse/DataLinks/l2_data_combined_feather_splitted_jl/index_data/'
            cols = ['securityid','timestamp','lastp','tradv']
        for tdaystr in sorted(os.listdir(indir)):
            tday = pd.to_datetime(tdaystr).date()
            if tday < start_date.value or tday > end_date.value:
                continue
            fp = os.path.join(indir,tdaystr,f'{inst}_{tdaystr}.feather')
            if not os.path.isfile(fp):
                logger.error(f'{fp} not exsist')
                continue

            try:
                df = pd.read_feather(fp,columns=cols).rename(columns={'lastp':'tradp',})

                cond = df['timestamp'].apply(lambda dt: (93000 <= dt2hms(dt) <= 113000) or (130000 <= dt2hms(dt) < 150005) )
                df = df[cond]
                logger.info(f'loading {fp}')
                # df = df[df['securityid'] == inst]
                # df['date'] = str(tday.date())
                dfs.append(df)
            except:
                logger.error(f'{inst} not in {fp}')
    elif asset_category.value in ['FUT','OPT']:        
        for tdaystr in sorted(os.listdir(future_rootdir)):
            subdir = os.path.join(future_rootdir,tdaystr)
            if not os.path.isdir(subdir) or not len(tdaystr) == 8:
                continue
                
            tday = pd.to_datetime(tdaystr).date()
            if tday < start_date.value or tday > end_date.value:
                continue            
            
            fps = glob.glob(os.path.join(future_rootdir,tdaystr,f'{inst}_{tdaystr}.csv*'))
            if len(fps) < 1:
                logger.error(f'{inst}_{tdaystr} not exsist')
                continue
            fp = fps[0]

            try:
                df = read_ctp_tick(fp)

                # cond = df['timestamp'].apply(lambda dt: (93000 <= dt2hms(dt) <= 113000) or (130000 <= dt2hms(dt) < 150005) )
                # df = df[cond]
                logger.info(f'loading {fp}')
                # df = df[df['securityid'] == inst]
                # df['date'] = str(tday.date())
                dfs.append(df)
            except:
                logger.error(f'{inst} not in {fp}')
        # add realtime tick data!!!!
        if end_date.value == pd.to_datetime(datetime.now().date()):
            __lastdaystr = sorted([item for item in os.listdir('/home/jesse/data_recorder/') if len(item) == 8 ])[-1]
            realtimefp = f'/home/jesse/data_recorder/{__lastdaystr}/{inst}_{__lastdaystr}.csv'
            logger.info(f'reading realtime csv:{realtimefp}')
            try:
                dfs.append(read_ctp_tick(realtimefp))
            except:
                logger.error(f'{inst} has no realtimefp:{realtimefp}')
            
    elif asset_category.value in ['FX']:
        fps = sorted(glob.glob(os.path.join(forex_rootdir,f'DAT_ASCII_{inst}_*.csv*')))
        for fp in fps:
            st = os.path.basename(fp)[-10:-4]
            dt_monst = pd.to_datetime(f'{st}01').date()
            dt_moned = dt_monst + relativedelta(months=1)
            if dt_moned <= start_date.value or dt_monst > end_date.value:
                continue
            df = pd.read_csv(fp,header=None).rename(columns={0:'timestamp',1:'tradp'})[['tradp','timestamp']]
            df.loc[:,'timestamp'] = pd.to_datetime(df['timestamp'],format='%Y%m%d %H%M%S%f')
            df = df[(df['timestamp'].apply(lambda x:x.date()) >= start_date.value) & (df['timestamp'].apply(lambda x:x.date()) <= end_date.value)]
            df.loc[:,'tradp'] = 10000 * df['tradp']
            dfs.append(df)
            
        
        
    alldf = pd.concat(dfs)
    alldf = alldf.sort_values('timestamp') #.sort_index()
    logger.info('loading done ')
    if auto_make_tri_mode:
        count = 0
        last_bucket_idx = 0
        # dist = dict()
        dist = Counter(adf['tradp'])
        # balanced_tri_dict = list()
        # can_form_tri = False
        last_mid = 0
        for idx,prc in enumerate(alldf['tradp']):
            count +=1    
            # at least 5 trades form a J
            # if len(dist.keys()) > 2:
            #     can_form_tri = True

            # print(v)
            # prc = v.tradp
            
            # if prc not in dist.keys():
            #     dist[prc] = 1
            # else:
            #     dist[prc] += 1
            
            # if len(pp.keys()) > lastklen:
            #     print(list(zip(pp.keys(),pp.values())))
            #     lastklen = len(pp.keys())


            # cal tri endpoints and plot
            balanced_prc = -1
            balanced_cnt = -1
            # find max counts' price level
            for _prc,_cnt in dist.items():
                if _cnt > balanced_cnt:
                    balanced_cnt = _cnt
                    balanced_prc = _prc


            up_p = max(dist.keys())
            dn_p = min(dist.keys())

            dn_p_m = balanced_prc - (up_p - balanced_prc)
            up_p_m = balanced_prc + (balanced_prc - dn_p)

            up_m_gap = up_p_m - up_p
            dn_m_gap = dn_p - dn_p_m
            th = 0.005

            sigma_cnt = sum(dist.values())
            tri_area_up = balanced_cnt * (up_p-balanced_prc)
            tri_area_dn = balanced_cnt * (balanced_prc -  dn_p)
            if 'sym_tri' ==  auto_make_tri_mode:
                cond = abs(up_m_gap) <= th and abs(dn_m_gap) <= th and up_p != balanced_prc and balanced_prc != dn_p and abs(balanced_prc - last_mid) > 0.01
            elif 'tri_done' == auto_make_tri_mode:
                cond = (abs(sigma_cnt -  tri_area_up) < 5 or abs(sigma_cnt - tri_area_dn) < 5) and sigma_cnt > 5
            # if :
            #     print(f'up tri fullfillment')
            # elif :
            #     print(f'dn tri fullfillment')

            if cond:
            # or count % 10000 == 0
            # :
                logger.info(f'count:{count:10d} mirror to DN:{dn_p:10.2f} Mid:{balanced_prc:10.2f} UP:{up_p:10.2f} DN:{dn_p:10.2f} '
                      f'M_UP:{up_p_m:10.2f} M_DN:{dn_p_m:10.2f} upGap:{up_m_gap:10.2f} dnGap:{dn_m_gap:10.2f}')
                # add to bucket
                _key = alldf['timestamp'].iloc[last_bucket_idx].strftime('%Y%m%d_%H%M%S') + '-'+alldf['timestamp'].iloc[idx].strftime('%Y%m%d_%H%M%S')
                logger.info(f'_key:{_key}')
                time_chains_dict[_key] = alldf.iloc[last_bucket_idx:idx+1,:].values
                
                last_bucket_idx = idx+1
                count = 0
                last_mid = balanced_prc
                dist = dict()
                # break
        # add last bucket
        _key = alldf['timestamp'].iloc[last_bucket_idx].strftime('%Y%m%d_%H%M%S') + '-'+alldf['timestamp'].iloc[idx].strftime('%Y%m%d_%H%M%S')
        logger.info(f'_key:{_key}')
        time_chains_dict[_key] = alldf.iloc[last_bucket_idx:idx+1,:].values
    else:
        # resample to min...
        # group by 1min    
        _dtr = dtsampler(alldf['timestamp'], sample_min_slider.value,)
        # _dtr =alldf['timestamp'].apply(lambda dt:dt.date())
        last_bp = None 
        print(_dtr[:5],)
        vfs = list()
        last_k = None
        _count = 0
        for k,subdf in alldf.groupby(_dtr):
            _count+=1
            if _count % 100 ==0:
                logger.info(f'{_count} comb: k:{k} lask:{last_k}')
            balanced_prc,_ = get_bp(subdf.tradp.values)
            
            if last_bp is None:
                last_k = k
                last_bp = balanced_prc
                vfs.append(subdf.tradp.values)
            else:
                # using ratio to combine range
                if abs(balanced_prc/last_bp-1) < comb_range_slider.value/1000:
                    vfs.append(subdf.tradp.values)
                    # new_df = pd.concat(dfs,ignore_index=True,copy=False)
                    new_vector = np.concatenate(vfs)
                    new_bp,_ = get_bp(new_vector)
                    last_bp = new_bp
                else:                   
                    # old combined and init new bp
                    # new_df = pd.concat(dfs,ignore_index=True,copy=False)
                    new_vector = np.concatenate(vfs)
                    time_chains_dict[last_k] = new_vector
                    # reinit vfs
                    vfs = list()
                    last_k = k
                    last_bp = balanced_prc
                    vfs.append(subdf.tradp.values)
        # the last segments
        new_vector = np.concatenate(vfs)
        time_chains_dict[last_k] = new_vector
                    
                    
            
            # break
    
    time_chain_ts = sorted(list(time_chains_dict.keys()))
    range_list.options = time_chain_ts
    range_list.value = list()
    
    # store current figure param
    time_chains_key_list = list()
    # store for triangle starts X, for SpanSelector
    last_dist_starts = list()
    redo_list = list()
    
    refresh_plots(None)

In [38]:
underlying_list.observe(on_select_inst,names='value')
resample_but.on_click(lambda b:on_select_inst(underlying_list.value))

In [39]:
def on_redo(b):
    # global redo_list,time_chains_key_list,last_dist_starts
    if len(redo_list) > 1:
        refresh_plots('redo')
    else:
        logger.error(f"CANNOT REDO!!!")
        
        

In [43]:
main_form

VBox(children=(Accordion(children=(Textarea(value='', layout=Layout(border_bottom='1px solid black', border_le…