In [41]:
# %%
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt

df = pd.read_json('../data/TSXData.json')
# Sort by timestamp unix
df = df.sort_values(by=['TimeStamp'])

In [2]:
df.MessageType.unique()

array(['NewOrderRequest', 'NewOrderAcknowledged', 'CancelRequest',
       'CancelAcknowledged', 'Cancelled', 'Trade'], dtype=object)

In [3]:
df.OrderID.nunique()

44768

In [4]:
# Unique Order IDs for Trade
matches = []
for order_id in df[df.MessageType=='Trade'].OrderID.unique():
    orderfilter = df[df.OrderID==order_id]
    if orderfilter.shape[0] > 1:
        print(f'FOUND with shape {orderfilter.shape}!')
        matches.append(orderfilter)
        break

FOUND with shape (6, 8)!


### ANOMALY DETECTED

In [5]:
matches[0].head(20)

Unnamed: 0,TimeStamp,TimeStampEpoch,Direction,OrderID,MessageType,Symbol,OrderPrice,Exchange
53287,2023-01-06 09:30:00.238813104,2023-01-06 14:30:00.238813104,ExchangeToNBF,b96e916e-9283-11ed-ac0d-047c16291a22,Trade,MLW24,140.14,TSX
53292,2023-01-06 09:30:00.238878716,2023-01-06 14:30:00.238878716,ExchangeToNBF,b96e916e-9283-11ed-ac0d-047c16291a22,Trade,MLW24,140.14,TSX
53293,2023-01-06 09:30:00.238891893,2023-01-06 14:30:00.238891893,ExchangeToNBF,b96e916e-9283-11ed-ac0d-047c16291a22,Trade,MLW24,140.14,TSX
53730,2023-01-06 09:30:00.268049672,2023-01-06 14:30:00.268049672,NBFToExchange,b96e916e-9283-11ed-ac0d-047c16291a22,CancelRequest,MLW24,,TSX
53734,2023-01-06 09:30:00.268169562,2023-01-06 14:30:00.268169562,ExchangeToNBF,b96e916e-9283-11ed-ac0d-047c16291a22,CancelAcknowledged,MLW24,,TSX
53735,2023-01-06 09:30:00.268174478,2023-01-06 14:30:00.268174478,ExchangeToNBF,b96e916e-9283-11ed-ac0d-047c16291a22,Cancelled,MLW24,,TSX


In [6]:
# Random Order ID
message = None
while message is None or message == 'Cancelled':
    random_transaction = df[df.OrderID==df.OrderID.sample(1).values[0]][['TimeStampEpoch', 'OrderID', 'MessageType', 'Symbol']]
    message = random_transaction['MessageType'].iloc[-1]

random_transaction.head()

Unnamed: 0,TimeStampEpoch,OrderID,MessageType,Symbol
195786,2023-01-06 14:31:44.379774918,b98f5ff8-9283-11ed-a35a-047c16291a22,NewOrderRequest,WHKCN
195791,2023-01-06 14:31:44.379875296,b98f5ff8-9283-11ed-a35a-047c16291a22,NewOrderAcknowledged,WHKCN


In [56]:
df['RoundedTimeStamp'] = df.TimeStamp.dt.round("S")

In [57]:
df.head()

Unnamed: 0,TimeStamp,TimeStampEpoch,Direction,OrderID,MessageType,Symbol,OrderPrice,Exchange,RoundedTimeStamp
0,2023-01-06 09:28:00.011058962,2023-01-06 14:28:00.011058962,NBFToExchange,b963bbc8-9283-11ed-9ad4-047c16291a22,NewOrderRequest,OA14Y,61.56,TSX,2023-01-06 09:28:00
1,2023-01-06 09:28:00.011081372,2023-01-06 14:28:00.011081372,NBFToExchange,b963bbc9-9283-11ed-8b08-047c16291a22,NewOrderRequest,OOOTO,69.37,TSX,2023-01-06 09:28:00
2,2023-01-06 09:28:00.011117286,2023-01-06 14:28:00.011117286,ExchangeToNBF,b963bbc8-9283-11ed-9ad4-047c16291a22,NewOrderAcknowledged,OA14Y,61.56,TSX,2023-01-06 09:28:00
3,2023-01-06 09:28:00.011125452,2023-01-06 14:28:00.011125452,NBFToExchange,b963bbca-9283-11ed-b19f-047c16291a22,CancelRequest,OA14Y,,TSX,2023-01-06 09:28:00
4,2023-01-06 09:28:00.011129285,2023-01-06 14:28:00.011129285,NBFToExchange,b963bbcb-9283-11ed-aba1-047c16291a22,NewOrderRequest,NQGAH,73.2,TSX,2023-01-06 09:28:00


In [59]:
lower_bound = df.TimeStamp.min()
upper_bound = lower_bound + pd.Timedelta(seconds=1)

df[(df.TimeStamp >= lower_bound) & (df.TimeStamp <= upper_bound)].index

Int64Index([   0,    1,    2,    3,    4,    5,    6,    7,    8,    9,
            ...
            1470, 1471, 1472, 1473, 1474, 1475, 1476, 1477, 1478, 1479],
           dtype='int64', length=1480)

In [55]:
fig = px.violin(df[~df.OrderPrice.isna()], y='OrderPrice', x='MessageType', color='MessageType', box=False, points='outliers')
fig.show()