In [4]:
pip install pandas openpyxl

Collecting openpyxl
  Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB)
Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
Downloading et_xmlfile-2.0.0-py3-none-any.whl (18 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-2.0.0 openpyxl-3.1.5
Note: you may need to restart the kernel to use updated packages.


In [5]:
import pandas as pd
import numpy as np
import plotly.express as px

import bmll2 as b2
from bmll2 import reference, Security, NormalisedSecurity, SparkHelper, get_market_data, get_market_data_range, VenueMarketError, save_spark_dataframe, load_spark_dataframe

pd.options.display.max_columns = None

In [6]:
def get_data(exchange, ticker, starting_date, ending_date):

    #if isinstance(startingdate, str):
    #    date = pd.to_datetime(date).date()
    
    try:
        trade_plus = get_market_data_range([exchange], start_date = starting_date, end_date = ending_date,
                                   table_name = 'trades-plus', df_engine = 'polars', ticker = ticker,
                                   columns = ['Classification', 'MIC', 'Ticker', 'ListingId', 'TradeDate',
                                              'LocalTradeTimestamp', 'ExchangeSequenceNo', 'AggressorSide',
                                              'Price', 'Size', 'PreTradeMid1ms', 'PostTradeMid1ms'])
    except VenueMarketError:
        return None
        
    trade_plus = trade_plus.to_pandas()
    trade_plus = trade_plus[(trade_plus['Classification'] == 'LIT_CONTINUOUS')]
    
    if trade_plus is None or trade_plus.empty:
        return None
        
    trade_plus = trade_plus[['MIC', 'Ticker', 'ListingId', 'TradeDate', 'LocalTradeTimestamp', 'ExchangeSequenceNo',
                             'AggressorSide', 'Price', 'Size', 'PreTradeMid1ms', 'PostTradeMid1ms']]
    trade_plus['AggressorSide'] = trade_plus['AggressorSide'].map({1 : 1, 2 : -1, 0 : 0})
    trade_plus = trade_plus.rename(columns = {'AggressorSide' : 'Trade Sign', 'PreTradeMid1ms' : 'Mid-price before',
                                              'PostTradeMid1ms' : 'Mid-price after(immediate)', 'Size' : 'Volume',
                                              'LocalTradeTimestamp' : 'DateTime', 'TradeDate' : 'Date'})
    midprice_after_delayed = trade_plus['Mid-price before'].shift(-1)
    trade_plus.insert(loc = 11, column = 'Mid-price after(delayed)', value = midprice_after_delayed)
    
    l1 = get_market_data_range(exchange, start_date = starting_date, end_date = ending_date, ticker = ticker,
                               table_name = 'l1', df_engine = 'polars')
    l1 = l1.to_pandas()
    l1 = l1[l1['MarketState'] == 'CONTINUOUS_TRADING']
    midprice = (l1['AskPrice1'] + l1['BidPrice1']) / 2
    l1.insert(loc = 5, column = 'Mid-price', value = midprice)
    l1 = l1.rename(columns = {'TradeDate' : 'Date'})
    l1 = l1.sort_values(by = ['Date', 'ExchangeSequenceNo'])
    daily_vol = (l1.groupby('Date', as_index = False)['Mid-price'].apply(lambda x: (x.max() - x.min()) / x.iloc[0])).rename(columns = {'Mid-price' : 'Daily Volatility'})
    daily_vol_alt = (l1.groupby('Date', as_index = False)['Mid-price'].apply(lambda x: np.log(x.max()) - np.log(x.min()))).rename(columns = {'Mid-price' : 'Daily Volatility(alt)'})
    trade_plus = pd.merge(trade_plus, daily_vol, on = 'Date', how = 'left')
    trade_plus = pd.merge(trade_plus, daily_vol_alt, on = 'Date', how = 'left')

    col = 'Daily Volatility'
    cols = list(trade_plus.columns)
    cols.insert(6, cols.pop(cols.index(col)))
    
    #col = 'Daily Volatility(alt)'
    #cols = list(trade_plus.columns)
    #cols.insert(7, cols.pop(cols.index(col)))
    
    trade_plus = trade_plus[cols]
    daily_volume = (trade_plus.groupby('Date', as_index = False)['Volume'].sum().rename(columns = {'Volume' : 'Daily Volume'}))
    trade_plus = pd.merge(trade_plus, daily_volume, on = 'Date', how = 'left')
    col = 'Daily Volume'
    cols = list(trade_plus.columns)
    cols.insert(6, cols.pop(cols.index(col)))
    trade_plus = trade_plus[cols]
    #trade_plus.insert(loc = 6, column = 'Daily Volume', value = trade_plus['Volume'].sum())
    
    trade_plus.sort_values(['DateTime', 'ExchangeSequenceNo'])
    return trade_plus
    

In [7]:
b2.get_file('other/JSE_listed_securities.xlsx')
JSE_listed_securities = pd.read_excel('JSE_listed_securities.xlsx')

JSE_listed_securities = JSE_listed_securities.drop(columns = ['Value', 'Change', 'Unnamed: 4', 'High', 'Low'])
JSE_listed_securities = JSE_listed_securities.fillna(0)

In [8]:
def clean(x):
    if(isinstance(x, (int, float))):
        ans = x
    elif(x[-1:] == 'M'):
        ans = float(x[:-1]) * 1000000
    elif(x[-1:] == 'K'):
        ans = float(x[:-1]) * 1000
    else:
        ans = 0
    return ans

In [9]:
JSE_listed_securities['Volume'] = JSE_listed_securities['Volume'].apply(clean)
JSE_listed_securities = JSE_listed_securities.sort_values('Volume', ascending = False)

In [10]:
def has_data(exchange, ticker, start, end):
    data = get_data(exchange, ticker, start, end)
    return data is not None

In [None]:
top100_tickers = []
i = 0

while len(top100_tickers) < 100 and i < len(JSE_listed_securities):
    ticker = JSE_listed_securities['Symbol'].iloc[i]

    ok_2023 = has_data('XJSE', ticker, '2023-01-01', '2023-01-07')
    ok_2025 = has_data('XJSE', ticker, '2025-12-01', '2025-12-07')

    if ok_2023 and ok_2025:
        top100_tickers.append(ticker)
    else:
        print(f'Skipping {ticker}: 2023 = {ok_2023}, 2025 = {ok_2025}')

    i += 1

top100_tickers = pd.DataFrame(top100_tickers)
top100_tickers.to_csv('top100_tickers_2.csv', index = False)
b2.put_file('top100_tickers_2.csv')

In [9]:
start_dates = pd.date_range(start = '2023-01-01', end = '2025-12-31', freq = 'MS', inclusive = 'both').date
end_dates   = pd.date_range(start = '2023-01-01', end = '2025-12-31', freq = 'M', inclusive = 'both').date

start_dates = start_dates[::3]
end_dates   = end_dates[2::3]

In [15]:
%%time
# A2XX or XJSE
exchange = 'A2XX'
for ticker in top100_tickers:
    stock = []
    for i in range(len(start_dates)):
        data = get_data(exchange, ticker, start_dates[i], end_dates[i])
        if data is not None:
            stock.append(data)

    if len(stock) == 0:
        continue
    
    stock_data = pd.concat(stock, ignore_index = True)
    stock_data.to_csv(f'{ticker}_{exchange}.csv', index = False)
    b2.put_file(f'{ticker}_{exchange}.csv', 'top_100(Volume)')
    print(ticker)

# takes about 5 hours to run

ValueError: No objects to concatenate