In [1]:
# Importing the libraries.

import pandas as pd
import os

In [2]:
# List of all the files in the directory, which contains the messages that Athex transmitted.

filenames = ["20240131.NewDataMessage.000251.log",
             "20240201.NewDataMessage.000329.log",
             "20240202.NewDataMessage.000448.log",
             "20240205.NewDataMessage.000508.log",
             "20240206.NewDataMessage.000528.log",
             "20240207.NewDataMessage.000128.log",
             "20240208.NewDataMessage.000248.log",
             "20240209.NewDataMessage.000028.log",
             "20240212.NewDataMessage.000606.log",
             "20240213.NewDataMessage.000231.log",
             "20240214.NewDataMessage.000331.log",
             "20240215.NewDataMessage.000432.log",
             "20240216.NewDataMessage.000651.log",
             "20240219.NewDataMessage.000028.log",
             "20240220.NewDataMessage.000230.log",
             "20240221.NewDataMessage.000432.log",
             "20240222.NewDataMessage.000647.log",
             "20240223.NewDataMessage.000029.log",
             "20240226.NewDataMessage.000343.log",
             "20240227.NewDataMessage.000530.log",
             "20240228.NewDataMessage.000627.log",
             "20240229.NewDataMessage.000110.log",
             "20240301.NewDataMessage.000208.log",
             "20240304.NewDataMessage.000324.log",
             "20240305.NewDataMessage.000509.log",
             "20240306.NewDataMessage.000550.log",
             "20240307.NewDataMessage.000051.log",
             "20240308.NewDataMessage.000227.log",
             "20240311.NewDataMessage.000305.log",
             "20240312.NewDataMessage.000531.log",
             "20240313.NewDataMessage.000131.log",
             "20240314.NewDataMessage.000208.log",
             "20240315.NewDataMessage.000209.log",
             "20240319.NewDataMessage.000611.log",
             "20240320.NewDataMessage.000111.log",
             "20240321.NewDataMessage.000331.log",
             "20240322.NewDataMessage.000609.log",
             "20240326.NewDataMessage.000230.log",
             "20240327.NewDataMessage.000507.log",
             "20240328.NewDataMessage.000130.log",
             "20240402.NewDataMessage.000051.log",
             "20240403.NewDataMessage.000148.log",
             "20240404.NewDataMessage.000612.log",
             "20240405.NewDataMessage.000027.log",
             "20240408.NewDataMessage.000628.log",
             "20240409.NewDataMessage.000050.log",
             "20240410.NewDataMessage.000210.log",
             "20240411.NewDataMessage.000409.log",
             "20240412.NewDataMessage.000628.log",
             "20240415.NewDataMessage.000606.log",
             "20240416.NewDataMessage.000651.log",
             "20240417.NewDataMessage.000131.log",
             "20240418.NewDataMessage.000250.log",
             "20240419.NewDataMessage.000408.log",
             "20240422.NewDataMessage.000409.log",
             "20240423.NewDataMessage.000607.log",
             "20240424.NewDataMessage.000630.log",
             "20240425.NewDataMessage.000207.log",
             "20240426.NewDataMessage.000311.log",
             "20240429.NewDataMessage.000149.log"]

In [3]:
interested_symbols = ['TPEIR','MYTIL','OPAP','PPC','HTO'] # List with the stock symbols that we are interested in.
message_types = ['A', 'B', 'C', 'G', 'I', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S'] # List with the message types that we are interested in.
Dates = "20240131","20240201","20240202","20240205","20240206","20240207","20240208","20240209","20240212","20240213","20240214","20240215","20240216","20240219","20240220","20240221","20240222","20240223","20240226","20240227","20240228","20240229","20240301","20240304","20240305","20240306","20240307","20240308","20240311","20240312","20240313","20240314","20240315","20240319","20240320","20240321","20240322","20240326","20240327","20240328","20240402","20240403","20240404","20240405","20240408", "20240409", "20240410", "20240411", "20240412", "20240415", "20240416", "20240417","20240418","20240419","20240422","20240423","20240424","20240425","20240426","20240429" #Available dates

In [4]:
data_K = pd.DataFrame(index=Dates, columns = ["K"]) # Empty dataframe that will be filled with the data of messages with category K.

In [5]:
data = pd.DataFrame(index=interested_symbols, columns = message_types) # Empty dataframe that will be filled with the data of all other message categories.

In [6]:
# According to the Documentation, messages are structured by header, text, soh, etx, lrc.

# soh = the beginning of the message
# etx = the end of the message
# lrc = the message validation check for its correctness.

# Below are developed 3 functions that concern the process of separating the basic structural elements and then for each type of message there is the function to extract the information.

def extract_header(message):
    header_start = 1  
    header_end = header_start + 35
    return message[header_start:header_end]

def extract_text(message):
    text_start = 36 
    text_end = message.find("\x03") 
    return message[text_start:text_end]

def check_soh_etx_lrc(message):
    soh = message[0] == "\x01"
    etx = message[-2] == "\x03"
    lrc = message[-1] 
    return soh, etx, lrc

In [7]:
# It concerns messages such as the start and close of a trading day as well as Administrative or Line Verification.

def Category_K_Control_Message(header,text):
    publication_timestamp = header[-20:]
    Subcategory = text[0]
    if len(text) == 1:
        return {
        "Publication Timestamp": publication_timestamp,
        "Subcategory": Subcategory,
        "Free Text": "NAn"
        }
    else:
        Free_Text = text[1:]
        return {
        "Publication Timestamp": publication_timestamp,
        "Subcategory": Subcategory,
        "Free Text": Free_Text
    }

# It concerns messages for implementing a transaction.

def Category_A_Trade_Message(header,text):
    publication_timestamp = header[-20:]
    Subcategory = header[3]
    symbol = text[:15]
    board_id = text[15]
    trade_number = text[16:22]
    buy_order_number = text[22:30]
    buy_order_date = text[30:38]
    sell_order_number = text[38:46]
    sell_order_date = text[46:54]
    price_str = text[54:63]
    price = float(price_str[:5] + "." + price_str[5:])
    volume_str = text[63:80]
    volume = float(volume_str[:15] + "." + volume_str[15:])
    total_volume_str = text[80:97]
    total_volume = float(total_volume_str[:15] + "." + total_volume_str[15:])
    trade_type = text[97]
    trade_source = text[98]
    buy_order_type = text[99]
    sell_order_type = text[100]
    buy_waiver_indicator = text[101:105]
    sell_waiver_indicator = text[105:109]
    notional_amount_str = text[109:123]
    notional_amount = float(notional_amount_str[:12] + "." + notional_amount_str[12:])
    buy_algo_flag = text[123]
    sell_algo_flag = text[124]
    SDIV_flag = text[125]
    DUPL_flag = text[126]
    TimeStamp = text[127:147]

    return {
        "Publication Timestamp": publication_timestamp,
        "Subcategory": Subcategory,
        "Symbol": symbol,
        "Board ID": board_id,
        "Trade number": trade_number,
        "Buy order number": buy_order_number,
        "Buy order date": buy_order_date,
        "Sell order number": sell_order_number,
        "Sell order date": sell_order_date,
        "Price": price,
        "Volume": volume,
        "Total Volume": total_volume,
        "Trade Type": trade_type,
        "Trade Source": trade_source,
        "Buy order type": buy_order_type,
        "Sell order type": sell_order_type,
        "Buy Waiver Indicator": buy_waiver_indicator,
        "Sell Waiver Indicator": sell_waiver_indicator,
        "Notional amount": notional_amount,
        "Buy ALGO flag": buy_algo_flag,
        "Sell ALGO flag": sell_algo_flag,
        "SDIV flag": SDIV_flag,
        "DUPL flag": DUPL_flag,
        "TimeStamp": TimeStamp
    }

# It concerns messages canceling a transaction.

def Category_I_Cancelled_Trade_Message(header,text):
    publication_timestamp = header[-20:]
    Subcategory = header[3]
    symbol = text[:15]
    board_id = text[15]
    trade_number = text[16:22]
    buy_order_number = text[22:30]
    buy_order_date = text[30:38]
    sell_order_number = text[38:46]
    sell_order_date = text[46:54]
    cancelled_trade_price_str = text[54:63]
    cancelled_trade_price = float(cancelled_trade_price_str[:5] + "." + cancelled_trade_price_str[5:])
    cancelled_volume_str = text[63:80]
    cancelled_volume = float(cancelled_volume_str[:15] + "." + cancelled_volume_str[15:])
    total_volume_str = text[80:97]
    total_volume = float(total_volume_str[:15] + "." + total_volume_str[15:])
    trade_type = text[97]
    trade_source = text[98]
    buy_order_type = text[99]
    sell_order_type = text[100]
    buy_waiver_indicator = text[101:105]
    sell_waiver_indicator = text[105:109]
    notional_amount_str = text[109:123]
    notional_amount = float(notional_amount_str[:12] + "." + notional_amount_str[12:])
    buy_algo_flag = text[123]
    sell_algo_flag = text[124]
    SDIV_flag = text[125]
    DUPL_flag = text[126]
    TimeStamp = text[127:147]

    return {
        "Publication Timestamp": publication_timestamp,
        "Subcategory": Subcategory,
        "Symbol": symbol,
        "Board ID": board_id,
        "Trade number": trade_number,
        "Buy order number": buy_order_number,
        "Buy order date": buy_order_date,
        "Sell order number": sell_order_number,
        "Sell order date": sell_order_date,
        "Cancelled Trade Price": cancelled_trade_price,
        "Cancelled Volume": cancelled_volume,
        "Total Volume": total_volume,
        "Trade Type": trade_type,
        "Trade Source": trade_source,
        "Buy order type": buy_order_type,
        "Sell order type": sell_order_type,
        "Buy Waiver Indicator": buy_waiver_indicator,
        "Sell Waiver Indicator": sell_waiver_indicator,
        "Notional amount": notional_amount,
        "Buy ALGO flag": buy_algo_flag,
        "Sell ALGO flag": sell_algo_flag,
        "SDIV flag": SDIV_flag,
        "DUPL flag": DUPL_flag,
        "TimeStamp": TimeStamp
    }

# It concerns messages for placing an order on the trading platform.

def Category_Q_Order_Message(header,text):
    publication_timestamp = header[-20:]
    Subcategory = header[3]
    symbol = text[:15]
    board_id = text[15]
    order_number = text[16:24]
    order_entry_date = text[24:32]
    order_status = text[32:34]
    side = text[34]
    volume_str = text[35:52]
    volume = float(volume_str[:15] + "." + volume_str[15:])
    matched_volume_str = text[52:69]
    matched_volume = float(matched_volume_str[:15] + "." + matched_volume_str[15:])
    price_str = text[69:78]
    price = float(price_str[:5] + "." + price_str[5:])
    original_price_type = text[78]
    original_lifetime = text[79]
    special_conditions = text[80]
    condition_volume = text[81:98]
    order_release_date = text[98:106]
    order_release_time = text[106:118]
    last_order_update_date = text[118:126]
    order_type = text[126]
    TimeStamp = text[127:147]

    return {
        "Publication Timestamp": publication_timestamp,
        "Subcategory": Subcategory,
        "Symbol": symbol,
        "Board ID": board_id,
        "Order number": order_number,
        "Order entry date": order_entry_date,
        "Order status": order_status,
        "Side": side,
        "Volume": volume,
        "Matched Volume": matched_volume,
        "Price": price,
        "Original price type": original_price_type,
        "Order lifetime": original_lifetime,
        "Special conditions": special_conditions,
        "Condition volume": condition_volume,
        "Order Release Date": order_release_date,
        "Order Release Time": order_release_time,
        "Last Order Update Date": last_order_update_date,
        "Order Type": order_type,
        "TimeStamp": TimeStamp
    }

# It concerns messages about canceling an order on the trading platform.

def Category_R_Cancelled_Order_Message(header,text):
    publication_timestamp = header[-20:]
    Subcategory = header[3]
    symbol = text[:15]
    board_id = text[15]
    order_number = text[16:24]
    order_entry_date = text[24:32]
    side = text[32]
    volume_str = text[33:50]
    volume = float(volume_str[:15] + "." + volume_str[15:])
    matched_volume_str = text[50:67]
    matched_volume = float(matched_volume_str[:15] + "." + matched_volume_str[15:])
    price_str = text[67:76]
    price = float(price_str[:5] + "." + price_str[5:])
    original_price_type = text[76]
    original_lifetime = text[77]
    special_conditions = text[78]
    condition_volume = text[79:96]
    order_type = text[96]
    TimeStamp = text[97:117]

    return {
        "Publication Timestamp": publication_timestamp,
        "Subcategory": Subcategory,
        "Symbol": symbol,
        "Board ID": board_id,
        "Order number": order_number,
        "Order entry date": order_entry_date,
        "Side": side,
        "Volume": volume,
        "Matched Volume": matched_volume,
        "Price": price,
        "Original price type": original_price_type,
        "Order lifetime": original_lifetime,
        "Special conditions": special_conditions,
        "Condition volume": condition_volume,
        "Order Type": order_type,
        "TimeStamp": TimeStamp
    }

# It concerns messages that make up the LOB data, where each one signals a change in the Bid or Ask sides.

def Category_B_Quote_Message(header,text):
    publication_timestamp = header[-20:]
    Subcategory = header[3]
    symbol = text[:15]
    quote_levels_str = text[15:18]
    quote_levels = int(quote_levels_str)

    quote_data = {}
    for i in range(quote_levels):
        start = 18 + i * 66
        bid_price = float(text[start:start+9][:5] + "." + text[start:start+9][5:])
        bid_size = int(text[start+9:start+26][:15])
        bid_orders = int(text[start+26:start+33])
        ask_price = float(text[start+33:start+42][:5] + "." + text[start+33:start+42][5:])
        ask_size = int(text[start+42:start+59][:15])
        ask_orders = int(text[start+59:start+66])

        quote_data['Publication Timestamp'] = publication_timestamp
        quote_data['Subcategory'] = Subcategory
        quote_data['Symbol'] = symbol
        quote_data['Quote Levels'] = quote_levels
        quote_data[f'bid_price_{i+1}'] = bid_price
        quote_data[f'bid_size_{i+1}'] = bid_size
        quote_data[f'bid_orders_{i+1}'] = bid_orders
        quote_data[f'ask_price_{i+1}'] = ask_price
        quote_data[f'ask_size_{i+1}'] = ask_size
        quote_data[f'ask_orders_{i+1}'] = ask_orders

    return quote_data

# It concerns messages for calculating the price of stock market indices.

def Category_C_Index_Message(header,text):
    publication_timestamp = header[-20:]
    Subcategory = header[3]
    symbol = text[:15]
    index_price_str = text[-9:]
    index_price = float(index_price_str[:5] + "." + index_price_str[5:])

    return {
        "Publication Timestamp": publication_timestamp,
        "Subcategory": Subcategory,
        "Symbol": symbol,
        "Index Price": index_price
    }

# It concerns messages for calculating the price of shares during the duration, beginning and end of an auction.

def Category_M_ProjectedAuction_AuctionOpen_ProjectedClosePrice_Message(header,text):
    publication_timestamp = header[-20:]
    Subcategory = header[3]
    symbol = text[:15]
    price_flag = text[15]
    price_str = text[16:25]
    price = float(price_str[:5] + "." + price_str[5:])
    volume_str = text[25:42]
    volume = float(volume_str[:15] + "." + volume_str[15:])
    TimeStamp = text[42:62]

    return {
        "Publication Timestamp": publication_timestamp,
        "Subcategory": Subcategory,
        "Symbol": symbol,
        "Price Flag": price_flag,
        "Price": price,
        "Volume": volume,
        "TimeStamp": TimeStamp
    }

# It concerns messages about the daily High/Low limits of each stock.

def Category_N_High_Low_Limit_Modification_Message(header,text):
    publication_timestamp = header[-20:]
    Subcategory = header[3]
    symbol = text[:15]
    ceiling_price_str = text[15:24]
    ceiling_price = float(ceiling_price_str[:5] + "." + ceiling_price_str[5:])
    floor_price_str = text[24:33]
    floor_price = float(floor_price_str[:5] + "." + floor_price_str[5:])
    start_of_day_price_str = text[33:42]
    start_of_day_price = float(start_of_day_price_str[:5] + "." + start_of_day_price_str[5:])
    accrued_interest_str= text[42:56]
    accrued_interest = float(accrued_interest_str[:8] + "." + accrued_interest_str[8:])
    

    return {
        "Publication Timestamp": publication_timestamp,
        "Subcategory": Subcategory,
        "Symbol": symbol,
        "Ceiling Price": ceiling_price,
        "Floor Price": floor_price,
        "Start Of Day Price": start_of_day_price,
        "Accrued Interest": accrued_interest
    }

# It concerns messages for changing the status of each transaction object.

def Category_O_Instrument_State_Message(header,text):
    publication_timestamp = header[-20:]
    Subcategory = header[3]
    symbol = text[:15]
    phase_ID = text[15]
    instrument_status = text[16]
    halt_suspend_reason = text[17]
    TimeStamp = text[18:38]
    

    return {
        "Publication Timestamp": publication_timestamp,
        "Subcategory": Subcategory,
        "Symbol": symbol,
        "Phase ID": phase_ID,
        "Instrument Status": instrument_status,
        "Halt/Suspend Reason": halt_suspend_reason,
        "TimeStamp": TimeStamp
    }

# It concerns messages for changing the status of each transaction object.

def Category_P_Market_Status_Message(header,text):
    publication_timestamp = header[-20:]
    Subcategory = header[3]
    market_ID = text[0]
    market_status = text[1]
    

    return {
        "Publication Timestamp": publication_timestamp,
        "Subcategory": Subcategory,
        "Market ID": market_ID,
        "Market Status": market_status,
    }

# It concerns summary messages of the trading day, providing information such as High, Low, etc. at the end of the day.

def Category_G_Instrument_Summary_Message(header,text):
    publication_timestamp = header[-20:]
    Subcategory = header[3]
    symbol = text[:15]
    Opening_Price_str = text[15:24]
    Opening_Price = float(Opening_Price_str[:5] + "." + Opening_Price_str[5:])
    High_str = text[24:33]
    High = float(High_str[:5] + "." + High_str[5:])
    Low_str = text[33:42]
    Low = float(Low_str[:5] + "." + Low_str[5:])
    Last_str = text[42:51]
    Last = float(Last_str[:5] + "." + Last_str[5:])
    Closing_Price_str = text[51:60]
    Closing_Price = float(Closing_Price_str[:5] + "." + Closing_Price_str[5:])
    start_of_day_price_str = text[60:69]
    start_of_day_price = float(start_of_day_price_str[:5] + "." + start_of_day_price_str[5:])
    Total_Volume_str = text[69:86]
    Total_Volume = float(Total_Volume_str[:15] + "." + Total_Volume_str[15:])
    Total_Value_str = text[86:103]
    Total_Value = float(Total_Value_str[:15] + "." + Total_Value_str[15:])

    return {
        "Publication Timestamp": publication_timestamp,
        "Subcategory": Subcategory,
        "Symbol": symbol,
        "Opening Price": Opening_Price,
        "High": High,
        "Low": Low,
        "Last": Last,
        "Closing Price": Closing_Price,
        "Start Of Day Price": start_of_day_price,
        "Total Volume": Total_Volume,
        "Total Value": Total_Value
    }

# It concerns messages about the closing price of each item at the end of the day.

def Category_L_Closing_Price_Message(header,text):
    publication_timestamp = header[-20:]
    Subcategory = header[3]
    symbol = text[:15]
    Closing_str = text[15:24]
    Closing = float(Closing_str[:5] + "." + Closing_str[5:])
    Open_Interest = text[24:32]

    return {
        "Publication Timestamp": publication_timestamp,
        "Subcategory": Subcategory,
        "Symbol": symbol,
        "Closing ": Closing,
        "Open Interest": Open_Interest
    }

# It concerns messages for exchanging notifications.
def Category_S_Exchange_Notifications_Message(header,text):
    publication_timestamp = header[-20:]
    Subcategory = header[3]
    Notification_Headline_in_English = text[:72]
    Notification_Headline_in_Local = text[72:144]
    Text_Size_in_English_str = text[144:149]
    Text_Size_in_English = int(Text_Size_in_English_str)
    Text_Size_in_Local_str = text[149:154]
    Text_Size_in_Local = int(Text_Size_in_Local_str)
    English_Notification_Text = text[154:154 + Text_Size_in_English]
    Local_Notification_Text = text[154 + Text_Size_in_English:]

    return {
        "Publication Timestamp": publication_timestamp,
        "Subcategory": Subcategory,
        "Notification Headline in English": Notification_Headline_in_English,
        "Notification Headline in Local": Notification_Headline_in_Local,
        "Text Size in English": Text_Size_in_English,
        "Text Size in Local": Text_Size_in_Local,
        "English Notification Text": English_Notification_Text,
        "Local Notification Text": Local_Notification_Text
    }

In [8]:
directory_path = '/Users/lamprosganias/Downloads/datedata/2024_02_03_04/0 - data'

In [None]:
# For each date we keep in the variable data_K the messages of category K.

for i in data_K.index:
    for file in filenames:
        control_messages_data = []
        file_path = os.path.join(directory_path, file)

        with open(file_path, 'r') as ndata:
                nndata = ndata.readlines()

        decoded_messages = []

        for hex_string in nndata:
            decoded_string = bytes.fromhex(hex_string).decode('ASCII')
            decoded_messages.append(decoded_string)

        for message in decoded_messages:
            header = extract_header(message)
            text = extract_text(message)
            soh, etx, lrc = check_soh_etx_lrc(message)
            if header[2] == 'K':
                control_message_info = Category_K_Control_Message(header, text)
                control_messages_data.append(control_message_info)

        control_messages= pd.DataFrame(control_messages_data)
        if len(control_messages)>0:
            control_messages['Publication Timestamp'] = pd.to_datetime(control_messages['Publication Timestamp'], format='%Y%m%d%H%M%S%f')
        data_K.at[i, 'K'] = control_messages
    print(i)

20240131
20240201
20240202
20240205
20240206
20240207
20240208
20240209
20240212
20240213
20240214
20240215
20240216
20240219
20240220
20240221
20240222
20240223
20240226
20240227
20240228
20240229
20240301
20240304
20240305
20240306
20240307
20240308
20240311
20240312
20240313
20240314
20240315
20240319
20240320
20240321
20240322
20240326
20240327
20240328
20240402
20240403
20240404
20240405
20240408
20240409
20240410
20240411
20240412
20240415
20240416
20240417
20240418
20240419
20240422
20240423
20240424
20240425
20240426
20240429


In [9]:
# Procedure for finding the stocks with the highest number of observations. 
# Through this process, we arrived at the five stocks from different sectors that we will work on.
# The stocks are: TPEIR, MYTIL, OPAP, PPC, HTO. 
# We relied on message category B concerning LOB data

unique_text_fragments = []
symbol_counts = {}

for file in filenames:
    file_path = os.path.join(directory_path, file)
    with open(file_path, 'r') as ndata:
        hex_lines = ndata.readlines()

    decoded_messages = []
    for hex_string in hex_lines:
        decoded_string = bytes.fromhex(hex_string.strip()).decode('ASCII')
        decoded_messages.append(decoded_string)
    
    for message in decoded_messages:
        header = extract_header(message)
        text = extract_text(message)
        soh, etx, lrc = check_soh_etx_lrc(message)

        if header[2] == 'B':
            symbol = text[:15]
            if symbol not in unique_text_fragments:
                unique_text_fragments.append(symbol)
            
            if symbol in symbol_counts:
                symbol_counts[symbol] += 1
            else:
                symbol_counts[symbol] = 1


sorted_symbols = sorted(symbol_counts.items(), key=lambda item: item[1], reverse=True)
# print("Unique text fragments:", unique_text_fragments)
# print("Symbol counts (sorted):")
# for symbol, count in sorted_symbols:
#     print(f"{symbol}: {count}")


In [None]:
# Then, for the shares we have defined, we hold the data for each message type in the data variable.

for i in data.index:
    print(i)
    trade_messages_data = []
    cancelled_trade_messages_data = []
    order_messages_data = []
    cancelled_order_messages_data = []
    quote_messages_data = []
    index_messages_data = []
    Category_M_messages_data = []
    Category_N_messages_data = []
    instrument_state_messages_data = []
    market_status_messages_data = []
    instrument_summary_messages_data = []
    closing_price_messages_data = []
    exchange_notifications_messages_data = []

    for file in filenames:
        file_path = os.path.join(directory_path, file)
        
        with open(file_path, 'r') as ndata:
            nndata = ndata.readlines()

        decoded_messages = []
        for hex_string in nndata:
            decoded_string = bytes.fromhex(hex_string).decode('ASCII')
            decoded_messages.append(decoded_string)

        for message in decoded_messages:
            header = extract_header(message)
            text = extract_text(message)
            soh, etx, lrc = check_soh_etx_lrc(message)
            if i + " "== text[:len(i)+1]:
                if header[2] == 'A':  
                    trade_message_info = Category_A_Trade_Message(header, text)
                    trade_messages_data.append(trade_message_info)
                elif header[2] == 'I':  
                    cancelled_trade_message_info = Category_I_Cancelled_Trade_Message(header, text)
                    cancelled_trade_messages_data.append(cancelled_trade_message_info)
                elif header[2] == 'Q':  
                    order_message_info = Category_Q_Order_Message(header, text)
                    order_messages_data.append(order_message_info)
                elif header[2] == 'R':  
                    cancelled_order_message_info = Category_R_Cancelled_Order_Message(header, text)
                    cancelled_order_messages_data.append(cancelled_order_message_info)
                elif header[2] == 'B':  
                    quote_message_info = Category_B_Quote_Message(header, text)
                    quote_messages_data.append(quote_message_info)
                elif header[2] == 'C':  
                    index_message_info = Category_C_Index_Message(header, text)
                    index_messages_data.append(index_message_info)
                elif header[2] == 'M':  
                    Category_M_message_info = Category_M_ProjectedAuction_AuctionOpen_ProjectedClosePrice_Message(header, text)
                    Category_M_messages_data.append(Category_M_message_info)
                elif header[2] == 'N':  
                    Category_N_message_info = Category_N_High_Low_Limit_Modification_Message(header, text)
                    Category_N_messages_data.append(Category_N_message_info)
                elif header[2] == 'O':  
                    instrument_state_message_info = Category_O_Instrument_State_Message(header, text)
                    instrument_state_messages_data.append(instrument_state_message_info)
                elif header[2] == 'P':  
                    market_status_message_info = Category_P_Market_Status_Message(header, text)
                    market_status_messages_data.append(market_status_message_info)
                elif header[2] == 'G':  
                    instrument_summary_message_info = Category_G_Instrument_Summary_Message(header, text)
                    instrument_summary_messages_data.append(instrument_summary_message_info)
                elif header[2] == 'L':  
                    closing_price_message_info = Category_L_Closing_Price_Message(header, text)
                    closing_price_messages_data.append(closing_price_message_info)
                elif header[2] == 'S':  
                    exchange_notifications_message_info = Category_S_Exchange_Notifications_Message(header, text)
                    exchange_notifications_messages_data.append(exchange_notifications_message_info)
                    
    trade_messages= pd.DataFrame(trade_messages_data)
    if len(trade_messages)>0:
        trade_messages['Publication Timestamp'] = pd.to_datetime(trade_messages['Publication Timestamp'], format='%Y%m%d%H%M%S%f')
        trade_messages['TimeStamp'] = pd.to_datetime(trade_messages['TimeStamp'], format='%Y%m%d%H%M%S%f')
    data.at[i, 'A'] = trade_messages

    cancelled_trade_messages= pd.DataFrame(cancelled_trade_messages_data)
    if len(cancelled_trade_messages)>0:
        cancelled_trade_messages['Publication Timestamp'] = pd.to_datetime(cancelled_trade_messages['Publication Timestamp'], format='%Y%m%d%H%M%S%f')
        cancelled_trade_messages['TimeStamp'] = pd.to_datetime(cancelled_trade_messages['TimeStamp'], format='%Y%m%d%H%M%S%f')
    data.at[i, 'I'] = cancelled_trade_messages

    order_messages= pd.DataFrame(order_messages_data)
    if len(order_messages)>0:
        order_messages['Publication Timestamp'] = pd.to_datetime(order_messages['Publication Timestamp'], format='%Y%m%d%H%M%S%f')
        order_messages['Order entry date'] = pd.to_datetime(order_messages['Order entry date'], format='%Y%m%d')
        order_messages['Order Release Date'] = pd.to_datetime(order_messages['Order Release Date'], format='%Y%m%d')
        order_messages['Order Release Time'] = pd.to_datetime(order_messages['Order Release Time'], format='%H%M%S%f')
        order_messages['TimeStamp'] = pd.to_datetime(order_messages['TimeStamp'], format='%Y%m%d%H%M%S%f')
    data.at[i, 'Q'] = order_messages

    cancelled_order_messages= pd.DataFrame(cancelled_order_messages_data)
    if len(cancelled_order_messages)>0:
        cancelled_order_messages['Publication Timestamp'] = pd.to_datetime(cancelled_order_messages['Publication Timestamp'], format='%Y%m%d%H%M%S%f')
        cancelled_order_messages['Order entry date'] = pd.to_datetime(cancelled_order_messages['Order entry date'], format='%Y%m%d')
        cancelled_order_messages['TimeStamp'] = pd.to_datetime(cancelled_order_messages['TimeStamp'], format='%Y%m%d%H%M%S%f')
    data.at[i, 'R'] = cancelled_order_messages

    quote_messages = pd.DataFrame(quote_messages_data)
    if len(quote_messages)>0:
        quote_messages['Publication Timestamp'] = pd.to_datetime(quote_messages['Publication Timestamp'], format='%Y%m%d%H%M%S%f')
    data.at[i, 'B'] = quote_messages

    index_messages= pd.DataFrame(index_messages_data)
    if len(index_messages)>0:
        index_messages['Publication Timestamp'] = pd.to_datetime(index_messages['Publication Timestamp'], format='%Y%m%d%H%M%S%f')
    data.at[i, 'C'] = index_messages

    Category_M_messages= pd.DataFrame(Category_M_messages_data)
    if len(Category_M_messages)>0:
        Category_M_messages['Publication Timestamp'] = pd.to_datetime(Category_M_messages['Publication Timestamp'], format='%Y%m%d%H%M%S%f')
        Category_M_messages['TimeStamp'] = pd.to_datetime(Category_M_messages['TimeStamp'], format='%Y%m%d%H%M%S%f')
    data.at[i, 'M'] = Category_M_messages

    Category_N_messages= pd.DataFrame(Category_N_messages_data)
    if len(Category_N_messages)>0:
        Category_N_messages['Publication Timestamp'] = pd.to_datetime(Category_N_messages['Publication Timestamp'], format='%Y%m%d%H%M%S%f')
    data.at[i, 'N'] = Category_N_messages

    instrument_state_messages= pd.DataFrame(instrument_state_messages_data)
    if len(instrument_state_messages)>0:
        instrument_state_messages['Publication Timestamp'] = pd.to_datetime(instrument_state_messages['Publication Timestamp'], format='%Y%m%d%H%M%S%f')
        instrument_state_messages['TimeStamp'] = pd.to_datetime(instrument_state_messages['TimeStamp'], format='%Y%m%d%H%M%S%f')
    data.at[i, 'O'] = instrument_state_messages

    market_status_messages= pd.DataFrame(market_status_messages_data)
    if len(market_status_messages)>0:
        market_status_messages['Publication Timestamp'] = pd.to_datetime(market_status_messages['Publication Timestamp'], format='%Y%m%d%H%M%S%f')
    data.at[i, 'P'] = market_status_messages

    instrument_summary_messages= pd.DataFrame(instrument_summary_messages_data)
    if len(instrument_summary_messages)>0:
        instrument_summary_messages['Publication Timestamp'] = pd.to_datetime(instrument_summary_messages['Publication Timestamp'], format='%Y%m%d%H%M%S%f')
    data.at[i, 'G'] = instrument_summary_messages

    closing_price_messages = pd.DataFrame(closing_price_messages_data)
    if len(closing_price_messages)>0:
        closing_price_messages['Publication Timestamp'] = pd.to_datetime(closing_price_messages['Publication Timestamp'], format='%Y%m%d%H%M%S%f')
    data.at[i, 'L'] = closing_price_messages

    exchange_notifications_messages = pd.DataFrame(exchange_notifications_messages_data)
    if len(exchange_notifications_messages)>0:
        exchange_notifications_messages['Publication Timestamp'] = pd.to_datetime(exchange_notifications_messages['Publication Timestamp'], format='%Y%m%d%H%M%S%f')
    data.at[i, 'S'] = exchange_notifications_messages


TPEIR
MYTIL
OPAP
PPC
HTO


In [None]:
# So our data is in the form of a DataFrame as shown below.
data

Unnamed: 0,A,B,C,G,I,L,M,N,O,P,Q,R,S
TPEIR,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Empty DataFrame Columns: [] Index: [],Publication Timestamp Subcategory ...,Empty DataFrame Columns: [] Index: [],Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Empty DataFrame Columns: [] Index: [],Empty DataFrame Columns: [] Index: [],Empty DataFrame Columns: [] Index: [],Empty DataFrame Columns: [] Index: []
MYTIL,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Empty DataFrame Columns: [] Index: [],Publication Timestamp Subcategory ...,Empty DataFrame Columns: [] Index: [],Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Empty DataFrame Columns: [] Index: [],Empty DataFrame Columns: [] Index: [],Empty DataFrame Columns: [] Index: [],Empty DataFrame Columns: [] Index: []
OPAP,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Empty DataFrame Columns: [] Index: [],Publication Timestamp Subcategory ...,Empty DataFrame Columns: [] Index: [],Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Empty DataFrame Columns: [] Index: [],Empty DataFrame Columns: [] Index: [],Empty DataFrame Columns: [] Index: [],Empty DataFrame Columns: [] Index: []
PPC,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Empty DataFrame Columns: [] Index: [],Publication Timestamp Subcategory ...,Empty DataFrame Columns: [] Index: [],Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Empty DataFrame Columns: [] Index: [],Empty DataFrame Columns: [] Index: [],Empty DataFrame Columns: [] Index: [],Empty DataFrame Columns: [] Index: []
HTO,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Empty DataFrame Columns: [] Index: [],Publication Timestamp Subcategory ...,Empty DataFrame Columns: [] Index: [],Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Empty DataFrame Columns: [] Index: [],Empty DataFrame Columns: [] Index: [],Empty DataFrame Columns: [] Index: [],Empty DataFrame Columns: [] Index: []


In [None]:
#  We define as stocks.
stocks = data

In [None]:
def remove_empty_columns(dataset):
    columns_to_drop = []  
    for column in dataset.columns:
        
        all_empty = all(dataset[column].apply(lambda x: x.empty if isinstance(x, pd.DataFrame) else True))
        if all_empty:
            columns_to_drop.append(column)
    cleaned_dataset = dataset.drop(columns=columns_to_drop)
    return cleaned_dataset

# Applying the function to remove empty columns.
stocks = remove_empty_columns(stocks)


In [14]:
stocks

Unnamed: 0,A,B,G,L,M,N,O
TPEIR,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...
MYTIL,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...
OPAP,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...
PPC,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...
HTO,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...,Publication Timestamp Subcategory ...


In [None]:
# Export the data to pickle files.
stocks.to_pickle('stocks.pkl')
data_K.to_pickle('data_K.pkl')