In [1]:
import pandas as pd

# Load the label files
labels_1 = pd.read_csv('ALCD_labels_800001.csv')
labels_2 = pd.read_csv('ALCD_labels_800002.csv')

# Load the order book files
order_book_1 = pd.read_csv('ALCD_orderbook_20_800001.csv')
order_book_2 = pd.read_csv('ALCD_orderbook_20_800002.csv')

# Load the order stream file
order_stream_2 = pd.read_csv('ALCD_stream_800002.csv')

# Concatenate the labels and order book data
labels = pd.concat([labels_1, labels_2], ignore_index=True)
order_book = pd.concat([order_book_1, order_book_2], ignore_index=True)

# Rename columns for better understanding
labels.columns = ['ID', 'Label']
order_book.columns = [f'col_{i}' for i in range(order_book.shape[1])]

# Example analysis: Count the number of each label type
label_counts = labels['Label'].value_counts()

# Merge labels with order book data (assuming the ID corresponds to the order book index)
order_book_with_labels = order_book.copy()
order_book_with_labels['Label'] = labels['Label']

# Example analysis: Describe the order book data grouped by labels
grouped_description = order_book_with_labels.groupby('Label').describe()

# Process order stream data
order_stream_2.columns = ['timestamp', 'order_id', 'price', 'quantity', 'agent_id', 'direction', 'other_metric']
order_stream_summary = order_stream_2.describe()

# Save the analysis results to CSV files
label_counts.to_csv('label_counts.csv', index=True)
grouped_description.to_csv('grouped_description.csv')
order_stream_summary.to_csv('order_stream_summary.csv')

# Print the analysis results
print("Label Counts:\n", label_counts)
print("\nGrouped Description:\n", grouped_description)
print("\nOrder Stream Summary:\n", order_stream_summary)


Label Counts:
 Label
NoiseAgent                     2000
ValueAgent                      204
MomentumAgent                    24
AdaptivePOVMarketMakerAgent       4
ExecutionAgent                    2
Name: count, dtype: int64

Grouped Description:
                               col_0                                        \
                              count          mean           std       min   
Label                                                                       
AdaptivePOVMarketMakerAgent     4.0  1.000382e+05  2.801636e+01  100013.0   
ExecutionAgent                  2.0  1.000385e+05  3.323402e+01  100015.0   
MomentumAgent                  24.0  1.000389e+05  2.456104e+01  100014.0   
NoiseAgent                   2000.0  1.009992e+07  3.161455e+08   99937.0   
ValueAgent                    204.0  1.000327e+05  2.358997e+01  100007.0   

                                                                           \
                                   25%       50%        