# Volume-Synchronized Probability of Informed Trading (VPIN)

$\text{VPIN}_\tau = \frac{|V_\tau^\text{buy} - V_\tau^\text{sell}|}{V_\tau^\text{buy} + V_\tau^\text{sell}}$, where $\tau$ is the 10-minute interval.

In [134]:
%reset

Once deleted, variables cannot be recovered. Proceed (y/[n])? y


In [136]:
# import packages
import numpy as np
import pandas as pd
import sys

# Paths tricks
import os
from pathlib import Path

In [137]:
# some functions
def select_stock_book(book_path,stock_id,time_id=-1):
    
    stock_path = os.path.join(book_path, 'stock_id=' + str(stock_id))
    book = pd.read_parquet(stock_path)
    
    if time_id != -1: 
        book = book[book['time_id'] == time_id]
        
    return book

def select_stock_trades(trade_path,stock_id,time_id=-1):
    
    trade_path = os.path.join(trade_path, 'stock_id=' + str(stock_id))
    trades = pd.read_parquet(trade_path)
    
    if time_id != -1: 
        trades = trades[trades['time_id'] == time_id]
        
    return trades

In [138]:
# load the data
datapath = os.path.join(str(Path.home()), 'ownCloud', 'Data', 'Kaggle', 'optiver-realized-volatility-prediction')

book = select_stock_book(book_path=os.path.join(datapath, 'book_train.parquet'), stock_id=0, time_id=5)
trades = select_stock_trades(trade_path=os.path.join(datapath, 'trade_train.parquet'), stock_id=0, time_id=5)

# data manipulation
trades['avg_size_per_order'] = trades['size']/trades['order_count'] # add average trade size
trades['prev_trade_second'] = trades['seconds_in_bucket'].shift()

# add time length for each state of the book
book['time_length'] = book['seconds_in_bucket'].diff().shift(periods=-1)
book.loc[len(book)-1, 'time_length'] = 600 - book['seconds_in_bucket'].iloc[-1]

In [139]:
trades.head()

Unnamed: 0,time_id,seconds_in_bucket,price,size,order_count,avg_size_per_order,prev_trade_second
0,5,21,1.002301,326,12,27.166667,
1,5,46,1.002778,128,4,32.0,21.0
2,5,50,1.002818,55,1,55.0,46.0
3,5,57,1.003155,121,5,24.2,50.0
4,5,68,1.003646,4,1,4.0,57.0


In [140]:
# a function to compute buyer initiated trades probability
def buyer_prob(price, bid, ask):
    return max(0, min(1, (price - bid)/(ask - bid)))

def signed_volume(df_book, start_time, end_time, price, volume, output='buy'):
    if output not in ['buy', 'sell']:
        sys.exit("Required output = 'buy' or 'sell'")
    
    if np.isnan(start_time):
        start_time = 0
    
    # compute the weighted bid and ask prices from the book
    w_ask = np.sum(df_book[(start_time <= df_book['seconds_in_bucket']) & (df_book['seconds_in_bucket'] < end_time)]['ask_price1'] * df_book[(start_time <= df_book['seconds_in_bucket']) & (df_book['seconds_in_bucket'] < end_time)]['time_length']) / (end_time - start_time)
    w_bid = np.sum(df_book[(start_time <= df_book['seconds_in_bucket']) & (df_book['seconds_in_bucket'] < end_time)]['bid_price1'] * df_book[(start_time <= df_book['seconds_in_bucket']) & (df_book['seconds_in_bucket'] < end_time)]['time_length']) / (end_time - start_time)
    
    if output == 'buy':
        result = volume * buyer_prob(price, w_bid, w_ask)
    else:
        result = volume * (1 - buyer_prob(price, w_bid, w_ask))

    return result

In [141]:
trades['buyer_volume'] = trades.apply(lambda row: signed_volume(book, row['prev_trade_second'], row['seconds_in_bucket'], row['price'], row['size']), axis=1)
trades['seller_volume'] = trades['size'] - trades['buyer_volume']
trades['vpin'] = trades.apply(lambda row: np.abs(row['buyer_volume'] - row['seller_volume'])/row['size'], axis = 1)
trades.head()

Unnamed: 0,time_id,seconds_in_bucket,price,size,order_count,avg_size_per_order,prev_trade_second,buyer_volume,seller_volume,vpin
0,5,21,1.002301,326,12,27.166667,,325.960008,0.039992,0.999755
1,5,46,1.002778,128,4,32.0,21.0,123.43811,4.56189,0.92872
2,5,50,1.002818,55,1,55.0,46.0,0.0,55.0,1.0
3,5,57,1.003155,121,5,24.2,50.0,121.0,0.0,1.0
4,5,68,1.003646,4,1,4.0,57.0,4.0,0.0,1.0


In [142]:
# calculating VPIN for time_id by weighting by size
VPIN = np.sum(trades['vpin'] * trades['size']) / np.sum(trades['size'])
print(VPIN)

0.9234727992343469
