In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Final

# Set style for better visualizations
sns.set_theme()
sns.set_palette('husl')

In [2]:
# Define data paths
DATA_DIR: Final[Path] = Path('data')

# Function to load price data for a specific day
def load_price_data(day: int) -> pd.DataFrame:
    """Load price data for a specific day.
    
    Args:
        day: The day number to load (-2, -1, or 0)
        
    Returns:
        DataFrame containing the price data
    """
    file_path = DATA_DIR / f'prices_round_1_day_{day}.csv'
    return pd.read_csv(file_path, sep=';')

# Function to load trade data for a specific day
def load_trade_data(day: int) -> pd.DataFrame:
    """Load trade data for a specific day.
    
    Args:
        day: The day number to load (-2, -1, or 0)
        
    Returns:
        DataFrame containing the trade data
    """
    file_path = DATA_DIR / f'trades_round_1_day_{day}.csv'
    return pd.read_csv(file_path, sep=';')

# Load data for all days
price_data = {day: load_price_data(day) for day in [-2, -1, 0]}
trade_data = {day: load_trade_data(day) for day in [-2, -1, 0]}

In [3]:
# Display basic information about price data
print("Price Data Structure:")
for day, df in price_data.items():
    print(f"\nDay {day}:")
    print(df.info())
    print("\nSample data:")
    display(df.head())

# Display basic information about trade data
print("\nTrade Data Structure:")
for day, df in trade_data.items():
    print(f"\nDay {day}:")
    print(df.info())
    print("\nSample data:")
    display(df.head())

Price Data Structure:

Day -2:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30000 entries, 0 to 29999
Data columns (total 17 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   day              30000 non-null  int64  
 1   timestamp        30000 non-null  int64  
 2   product          30000 non-null  object 
 3   bid_price_1      30000 non-null  int64  
 4   bid_volume_1     30000 non-null  int64  
 5   bid_price_2      11790 non-null  float64
 6   bid_volume_2     11790 non-null  float64
 7   bid_price_3      2081 non-null   float64
 8   bid_volume_3     2081 non-null   float64
 9   ask_price_1      30000 non-null  int64  
 10  ask_volume_1     30000 non-null  int64  
 11  ask_price_2      12619 non-null  float64
 12  ask_volume_2     12619 non-null  float64
 13  ask_price_3      2193 non-null   float64
 14  ask_volume_3     2193 non-null   float64
 15  mid_price        30000 non-null  float64
 16  profit_and_loss  30000 non-

Unnamed: 0,day,timestamp,product,bid_price_1,bid_volume_1,bid_price_2,bid_volume_2,bid_price_3,bid_volume_3,ask_price_1,ask_volume_1,ask_price_2,ask_volume_2,ask_price_3,ask_volume_3,mid_price,profit_and_loss
0,-2,0,RAINFOREST_RESIN,9996,1,9995.0,25.0,,,10004,1,10005.0,25.0,,,10000.0,0.0
1,-2,0,KELP,1998,26,,,,,2002,26,,,,,2000.0,0.0
2,-2,0,SQUID_INK,1998,26,,,,,2002,26,,,,,2000.0,0.0
3,-2,100,SQUID_INK,1999,5,1998.0,26.0,,,2001,20,,,,,2000.0,0.0
4,-2,100,RAINFOREST_RESIN,10000,5,9995.0,20.0,,,10005,20,,,,,10002.5,0.0



Day -1:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30000 entries, 0 to 29999
Data columns (total 17 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   day              30000 non-null  int64  
 1   timestamp        30000 non-null  int64  
 2   product          30000 non-null  object 
 3   bid_price_1      30000 non-null  int64  
 4   bid_volume_1     30000 non-null  int64  
 5   bid_price_2      11699 non-null  float64
 6   bid_volume_2     11699 non-null  float64
 7   bid_price_3      2018 non-null   float64
 8   bid_volume_3     2018 non-null   float64
 9   ask_price_1      30000 non-null  int64  
 10  ask_volume_1     30000 non-null  int64  
 11  ask_price_2      12428 non-null  float64
 12  ask_volume_2     12428 non-null  float64
 13  ask_price_3      2107 non-null   float64
 14  ask_volume_3     2107 non-null   float64
 15  mid_price        30000 non-null  float64
 16  profit_and_loss  30000 non-null  float64
dtypes: 

Unnamed: 0,day,timestamp,product,bid_price_1,bid_volume_1,bid_price_2,bid_volume_2,bid_price_3,bid_volume_3,ask_price_1,ask_volume_1,ask_price_2,ask_volume_2,ask_price_3,ask_volume_3,mid_price,profit_and_loss
0,-1,0,SQUID_INK,2005,1,2002.0,31.0,,,2006,31,,,,,2005.5,0.0
1,-1,0,RAINFOREST_RESIN,10002,1,9996.0,2.0,9995.0,29.0,10004,2,10005.0,29.0,,,10003.0,0.0
2,-1,0,KELP,2028,1,2026.0,2.0,2025.0,29.0,2029,31,,,,,2028.5,0.0
3,-1,100,KELP,2025,24,,,,,2028,2,2029.0,22.0,,,2026.5,0.0
4,-1,100,RAINFOREST_RESIN,9996,2,9995.0,22.0,,,10004,2,10005.0,22.0,,,10000.0,0.0



Day 0:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30000 entries, 0 to 29999
Data columns (total 17 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   day              30000 non-null  int64  
 1   timestamp        30000 non-null  int64  
 2   product          30000 non-null  object 
 3   bid_price_1      30000 non-null  int64  
 4   bid_volume_1     30000 non-null  int64  
 5   bid_price_2      11733 non-null  float64
 6   bid_volume_2     11733 non-null  float64
 7   bid_price_3      2031 non-null   float64
 8   bid_volume_3     2031 non-null   float64
 9   ask_price_1      30000 non-null  int64  
 10  ask_volume_1     30000 non-null  int64  
 11  ask_price_2      12518 non-null  float64
 12  ask_volume_2     12518 non-null  float64
 13  ask_price_3      2115 non-null   float64
 14  ask_volume_3     2115 non-null   float64
 15  mid_price        30000 non-null  float64
 16  profit_and_loss  30000 non-null  float64
dtypes: f

Unnamed: 0,day,timestamp,product,bid_price_1,bid_volume_1,bid_price_2,bid_volume_2,bid_price_3,bid_volume_3,ask_price_1,ask_volume_1,ask_price_2,ask_volume_2,ask_price_3,ask_volume_3,mid_price,profit_and_loss
0,0,0,RAINFOREST_RESIN,9998,1,9995.0,30.0,,,10005,30,,,,,10001.5,0.0
1,0,0,KELP,2028,30,,,,,2032,30,,,,,2030.0,0.0
2,0,0,SQUID_INK,1968,30,,,,,1971,30,,,,,1969.5,0.0
3,0,100,SQUID_INK,1969,4,1967.0,31.0,,,1971,31,,,,,1970.0,0.0
4,0,100,KELP,2030,4,2028.0,31.0,,,2032,31,,,,,2031.0,0.0



Trade Data Structure:

Day -2:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8398 entries, 0 to 8397
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   timestamp  8398 non-null   int64  
 1   buyer      0 non-null      float64
 2   seller     0 non-null      float64
 3   symbol     8398 non-null   object 
 4   currency   8398 non-null   object 
 5   price      8398 non-null   float64
 6   quantity   8398 non-null   int64  
dtypes: float64(3), int64(2), object(2)
memory usage: 459.4+ KB
None

Sample data:


Unnamed: 0,timestamp,buyer,seller,symbol,currency,price,quantity
0,0,,,KELP,SEASHELLS,2002.0,1
1,0,,,RAINFOREST_RESIN,SEASHELLS,9996.0,1
2,0,,,RAINFOREST_RESIN,SEASHELLS,9996.0,1
3,0,,,SQUID_INK,SEASHELLS,2002.0,1
4,100,,,KELP,SEASHELLS,2002.0,6



Day -1:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8306 entries, 0 to 8305
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   timestamp  8306 non-null   int64  
 1   buyer      0 non-null      float64
 2   seller     0 non-null      float64
 3   symbol     8306 non-null   object 
 4   currency   8306 non-null   object 
 5   price      8306 non-null   float64
 6   quantity   8306 non-null   int64  
dtypes: float64(3), int64(2), object(2)
memory usage: 454.4+ KB
None

Sample data:


Unnamed: 0,timestamp,buyer,seller,symbol,currency,price,quantity
0,0,,,KELP,SEASHELLS,2029.0,13
1,0,,,KELP,SEASHELLS,2029.0,1
2,0,,,RAINFOREST_RESIN,SEASHELLS,10004.0,1
3,0,,,SQUID_INK,SEASHELLS,2006.0,13
4,0,,,SQUID_INK,SEASHELLS,2006.0,1



Day 0:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8182 entries, 0 to 8181
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   timestamp  8182 non-null   int64  
 1   buyer      0 non-null      float64
 2   seller     0 non-null      float64
 3   symbol     8182 non-null   object 
 4   currency   8182 non-null   object 
 5   price      8182 non-null   float64
 6   quantity   8182 non-null   int64  
dtypes: float64(3), int64(2), object(2)
memory usage: 447.6+ KB
None

Sample data:


Unnamed: 0,timestamp,buyer,seller,symbol,currency,price,quantity
0,100,,,RAINFOREST_RESIN,SEASHELLS,10002.0,1
1,300,,,KELP,SEASHELLS,2029.0,6
2,300,,,RAINFOREST_RESIN,SEASHELLS,9998.0,2
3,300,,,RAINFOREST_RESIN,SEASHELLS,9998.0,1
4,300,,,SQUID_INK,SEASHELLS,1965.0,6


In [4]:
def analyze_product_metrics(day: int) -> None:
    """Analyze key metrics for each product on a specific day.
    
    Args:
        day: The day to analyze
    """
    prices = price_data[day]
    trades = trade_data[day]
    
    # Get unique products
    products = prices['product'].unique()
    
    for product in products:
        print(f"\nAnalysis for {product} on Day {day}:")
        
        # Price statistics
        product_prices = prices[prices['product'] == product]
        print("\nPrice Statistics:")
        print(f"Average mid price: {product_prices['mid_price'].mean():.2f}")
        print(f"Min mid price: {product_prices['mid_price'].min():.2f}")
        print(f"Max mid price: {product_prices['mid_price'].max():.2f}")
        
        # Trade statistics
        product_trades = trades[trades['symbol'] == product]
        print("\nTrade Statistics:")
        print(f"Total trades: {len(product_trades)}")
        print(f"Total volume: {product_trades['quantity'].sum()}")
        print(f"Average trade price: {product_trades['price'].mean():.2f}")

# Analyze metrics for each day
for day in [-2, -1, 0]:
    analyze_product_metrics(day)


Analysis for RAINFOREST_RESIN on Day -2:

Price Statistics:
Average mid price: 10000.00
Min mid price: 9996.50
Max mid price: 10003.50

Trade Statistics:
Total trades: 2450
Total volume: 5184
Average trade price: 10000.04

Analysis for KELP on Day -2:

Price Statistics:
Average mid price: 2011.76
Min mid price: 1997.00
Max mid price: 2034.00

Trade Statistics:
Total trades: 2957
Total volume: 8310
Average trade price: 2011.13

Analysis for SQUID_INK on Day -2:

Price Statistics:
Average mid price: 2033.95
Min mid price: 1953.00
Max mid price: 2187.50

Trade Statistics:
Total trades: 2991
Total volume: 8328
Average trade price: 2034.85

Analysis for SQUID_INK on Day -1:

Price Statistics:
Average mid price: 1971.55
Min mid price: 1832.00
Max mid price: 2045.50

Trade Statistics:
Total trades: 2947
Total volume: 8467
Average trade price: 1971.11

Analysis for RAINFOREST_RESIN on Day -1:

Price Statistics:
Average mid price: 9999.99
Min mid price: 9996.50
Max mid price: 10003.50

Trade S