In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import pandas as pd
import numpy as np
from func import calc_vpin, imbalance
from data_load import load_data
from data_processing import transform_buy_sell_volume

In [3]:
df={}; sec_trades = {}
sym = ['STB', 'SAB','MWG', 'VCB','TCB']

In [4]:
## Load data
data_tick = load_data(folder="tick")
data_orderbook = load_data(folder="orderbook")

In [5]:
# Transform data
for s in sym:
    data = data_tick[s].copy()
    data.rename(columns = {"Gia KL": "PRICE", "KL": "SIZE"}, inplace = True)
    data.set_index("Date", inplace = True)
    data = data.resample("T").agg({
            'SIZE': 'sum',  # Cột volume tính tổng
            'PRICE': 'mean'    # Cột price tính trung bình
        })
    sec_trades[s] = data

In [6]:
# Cal vpin
volume = {}
for key, val in sec_trades.items():
    volume[key] = int(val['SIZE'].resample("D").sum().mean()/50)

for s in sym:
    print('Calculating VPIN')
    df[s] = calc_vpin(sec_trades[s],volume[s],50)
    print(s+' '+str(df[s].shape))

Calculating VPIN
STB (2700, 4)
Calculating VPIN
SAB (2709, 4)
Calculating VPIN
MWG (2700, 4)
Calculating VPIN
VCB (2700, 4)
Calculating VPIN
TCB (2700, 4)


In [7]:
## 
avg = pd.DataFrame()
print(avg.shape)
metric = 'CDF'
avg[metric] = np.nan
for stock,frame in df.items():
    frame = frame[[metric]].reset_index().drop_duplicates(subset='Time', keep='last').set_index('Time')
    avg = avg.merge(frame[[metric]],left_index=True,right_index=True,how='outer',suffixes=('',stock))
    print(avg.shape)
avg = avg.dropna(axis=0,how='all').fillna(method='ffill')
avg.to_csv('CDF.csv')

(0, 0)
(2032, 2)
(2995, 3)
(4106, 4)
(4784, 5)
(5276, 6)


In [8]:
fields = ['Time','CDFSTB','CDFSAB','CDFMWG','CDFVCB','CDFTCB']
df = pd.read_csv('CDF.csv',parse_dates=['Time'],index_col=[0],usecols = fields)

# rolling_pariwise_corr = pd.rolling_corr(df,window=50,pairwise=True)
rolling_pariwise_corr = df.rolling(window=50).corr()

thres = pd.DataFrame()
thres['AvgCorrAssets'] = rolling_pariwise_corr.groupby(by=['Time']).sum().sum(axis=1)/((len(fields)-1)**2)
thres.to_csv('AvgCorrAssets.csv')

In [9]:
STB = transform_buy_sell_volume(data_dict=data_orderbook, key1 = "STB")
STB["EX"] = "HNX"

In [10]:
STB.rename(columns = {
    "Gia_Mua": "BID",
    "KL_mua": "BIDSIZ",
    "Gia_Ban": "ASK",
    "KL_ban": "ASKSIZ"
    }, inplace = True)
STB.set_index("Date", inplace=True)

In [11]:
quote_imb=imbalance(STB)
quote_imb.to_csv('BACquote_imb.csv')