In [1]:
import bmll2 as b2
b2.get_file('modules/auxiliary_functions.py')

'auxiliary_functions.py'

In [2]:
# can do this when i have converted the notebooks to .py files
# import auxiliary_functions
import auxiliary_functions as af

import random
import math
import pandas as pd
import numpy as np
from pandas import StringDtype

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from matplotlib.ticker import LogFormatterSciNotation
from scipy.stats import linregress
from scipy.optimize import curve_fit
from scipy.stats import t

In [18]:
ticker = 'GFI'

In [19]:
b2.get_file(f'top_100(Volume)/{ticker}.csv')
stock_data = pd.read_csv(f'{ticker}.csv', parse_dates = ['DateTime', 'Date'])
stock_data = stock_data.rename(columns = {'Ticker' : 'RIC'})
stock_data = stock_data.sort_values(['DateTime', 'ExchangeSequenceNo'])

In [20]:
#b2.get_file('test_data/homo_4_immediate(100).csv')
#impact_data   = pd.read_csv('homo_4_immediate(100).csv', parse_dates = ['Date', 'Start time', 'End time'])

b2.get_file('test_data/metaorder_data_homogenous_4.csv')
impact_data = pd.read_csv('metaorder_data_homogenous_4.csv', parse_dates = ['Date', 'Start time', 'End time'])

stock_AD_data = impact_data[impact_data['RIC'] == ticker][['RIC', 'Date', '20 AD volatility', '20 AD volume']]
stock_AD_data = stock_AD_data.drop_duplicates(subset = ['Date']).reset_index(drop = True)

In [21]:
stock_data.head()

Unnamed: 0,MIC,RIC,ListingId,Date,DateTime,ExchangeSequenceNo,Daily Volume,Daily Volatility,Trade Sign,Price,Volume,Mid-price before,Mid-price after(immediate),Mid-price after(delayed),Daily Volatility(alt)
664775,XJSE,GFI,418405306,2023-01-03,2023-01-03 09:02:45.429878,102497,917460.0,0.042035,1,17866.0,175.0,17838.5,17838.5,17838.5,0.041435
664776,XJSE,GFI,418405306,2023-01-03,2023-01-03 09:02:45.430104,102499,917460.0,0.042035,-1,17866.0,75.0,17838.5,17838.5,17834.0,0.041435
664777,XJSE,GFI,418405306,2023-01-03,2023-01-03 09:03:21.672469,111663,917460.0,0.042035,1,17858.0,1.0,17834.0,17834.0,17816.0,0.041435
664778,XJSE,GFI,418405306,2023-01-03,2023-01-03 09:04:21.821773,161459,917460.0,0.042035,1,17822.0,238.0,17816.0,17819.0,17813.0,0.041435
664779,XJSE,GFI,418405306,2023-01-03,2023-01-03 09:04:51.086888,175394,917460.0,0.042035,1,17816.0,192.0,17813.0,17819.0,17816.0,0.041435


In [22]:
N                   = 20
trader_distribution = 'power'
alpha               = 2
identifier          = f'{trader_distribution}_{N}'

In [23]:
%%time
# power distribution and N = 4 gave a very crude but maybe passable result
impact_profile = []

for date, day_D in stock_data.groupby('Date', sort = True):
        print(date)

        trades = day_D.loc[day_D['Price'] != 0]
        N = 20
        f = af.trader_participation(N = N, method = 'power', alpha = 2, f_min = 1, f_max = trades.shape[0], seed = 1)
        c = af.cumulative_probs(f)

        if trades.empty:
            continue

        output = af.orders(N = N, trades = trades, cumulative_probs = c)
        for n in range(N):
            
            trader_n_trades = trades.iloc[output[n], ]
           
            if trader_n_trades.empty:
                continue

            trader_n_metaorders = af.metaorders(trader_n_trades)
            
            if len(trader_n_metaorders) == 0:
                continue

            else:
                for i in range(len(trader_n_metaorders)):

                    metaorder_i = trader_n_metaorders[i]
                    
                    if metaorder_i.empty:
                        continue

                    if len(metaorder_i) < 10:
                        continue

                    date           = metaorder_i['Date'].iloc[0]
                    vol_20_AD      = stock_AD_data[stock_AD_data['Date'] == date]['20 AD volatility'].iloc[0]
                    volume_20_AD   = stock_AD_data[stock_AD_data['Date'] == date]['20 AD volume'].iloc[0]
                    Q              = metaorder_i['Volume'].sum()
                    volatility     = metaorder_i['Daily Volatility']
                        
                    phi    = metaorder_i['Volume'].cumsum() / Q
                    sign   = metaorder_i['Trade Sign'].iloc[0]
                    impact = sign * np.log(metaorder_i['Mid-price after(immediate)'] / metaorder_i['Mid-price before'].iloc[0])
                    scaled_impact_vol  = impact / (vol_20_AD * np.sqrt(Q / volume_20_AD))
                    scaled_impact      = impact / (vol_20_AD * np.sqrt(Q))
                    impact_profile_df = pd.DataFrame({'phi': phi, 'impact': impact, 'scaled impact': scaled_impact, 'scaled impact(with volume)': scaled_impact_vol,
                                                      '20 AD volume': volume_20_AD, '20 AD volatility': vol_20_AD, 'Q': Q}).reset_index(drop = 'True')

                    if not impact_profile_df.empty and not impact_profile_df.isna().all().all():
                        impact_profile.append(impact_profile_df)


impact_profile  = pd.concat(impact_profile, ignore_index = True)
impact_profile.to_csv(f'{ticker}_impact_profile_{identifier}.csv', index = False)
b2.put_file(f'{ticker}_impact_profile_{identifier}.csv', 'test_data')




2023-01-03 00:00:00
2023-01-04 00:00:00
2023-01-05 00:00:00
2023-01-06 00:00:00
2023-01-09 00:00:00
2023-01-10 00:00:00
2023-01-11 00:00:00
2023-01-12 00:00:00
2023-01-13 00:00:00
2023-01-16 00:00:00
2023-01-17 00:00:00
2023-01-18 00:00:00
2023-01-19 00:00:00
2023-01-20 00:00:00
2023-01-23 00:00:00
2023-01-24 00:00:00
2023-01-25 00:00:00
2023-01-26 00:00:00
2023-01-27 00:00:00
2023-01-30 00:00:00
2023-01-31 00:00:00
2023-02-01 00:00:00
2023-02-02 00:00:00
2023-02-03 00:00:00
2023-02-06 00:00:00
2023-02-07 00:00:00
2023-02-08 00:00:00
2023-02-09 00:00:00
2023-02-10 00:00:00
2023-02-13 00:00:00
2023-02-14 00:00:00
2023-02-15 00:00:00
2023-02-16 00:00:00
2023-02-17 00:00:00
2023-02-20 00:00:00
2023-02-21 00:00:00
2023-02-22 00:00:00
2023-02-23 00:00:00
2023-02-24 00:00:00
2023-02-27 00:00:00
2023-02-28 00:00:00
2023-03-01 00:00:00
2023-03-02 00:00:00
2023-03-03 00:00:00
2023-03-06 00:00:00
2023-03-07 00:00:00
2023-03-08 00:00:00
2023-03-09 00:00:00
2023-03-10 00:00:00
2023-03-13 00:00:00
