In [1]:
import pandas as pd
import os
from pathlib import Path
import re

def load_trading_data(data_folder: str) -> dict[str, pd.DataFrame]:
    """
    Load trading data from CSV files into pandas DataFrames, agnostic to round and day.

    Args:
        data_folder (str): Path to the folder containing the CSV files

    Returns:
        Dict[str, pd.DataFrame]: Dictionary containing the following DataFrames:
            - 'prices': Price data for all days/rounds
            - 'trades': Trade data for all days/rounds
            - 'observations': Observation data for all days/rounds
    """
    data = {
        'prices': [],
        'trades': [],
        'observations': []
    }

    # Regex to match files and extract type, round, and day
    pattern = re.compile(r'^(prices|trades|observations)_round_(\d+)_day_(\d+)\.csv$')

    for file in Path(data_folder).iterdir():
        if file.is_file():
            match = pattern.match(file.name)
            if match:
                data_type, round_num, day = match.groups()
                sep = ';' if data_type in ['prices', 'trades'] else ','
                df = pd.read_csv(file, sep=sep)
                df['day'] = int(day)
                df['round'] = int(round_num)
                if 'timestamp' in df.columns:
                    df = df.sort_values('timestamp')
                data[data_type].append(df)

    # Concatenate all days' data
    result = {}
    for key, dfs in data.items():
        if dfs:  # Only add if we found any data
            result[key] = pd.concat(dfs, ignore_index=True)
            result[key].sort_values(['day', 'timestamp'], inplace=True)

    return result

def convert_timestamp(df: pd.DataFrame) -> pd.DataFrame:
    return df.assign(t=(df['day'] - 1) * 1_000_000 + df['timestamp']).drop(columns=['day', 'timestamp'])

In [20]:
trading_data = load_trading_data('round-5-island-data-bottle')
price_df = trading_data['prices']
price_df = convert_timestamp(price_df)
observation_df = trading_data['observations']
observation_df = convert_timestamp(observation_df)
counterparty_df = trading_data['trades']
counterparty_df = convert_timestamp(counterparty_df).drop(columns='round').set_index('t')
parties = counterparty_df['seller'].unique()

In [124]:
def add_price_extremes(df, window=3, center=True):
    df = df.sort_values(['product', 't'])
    df['rolling_min'] = df.groupby('product')['mid_price'].transform(lambda x: x.rolling(window=window, min_periods=1, center=center).min())
    df['rolling_max'] = df.groupby('product')['mid_price'].transform(lambda x: x.rolling(window=window, min_periods=1, center=center).max())
    return df
def tag_informed_trades(df, threshold=0.01):
    df['buy_at_low'] = (df['price'] <= df['rolling_min'] * (1 + threshold))
    df['sell_at_high'] = (df['price'] >= df['rolling_max'] * (1 - threshold))
    return df
def summarize_infomed_activity(df):
    # Calculate percentage and count of buys at low for each buyer-product
    buyer_group = df.groupby(['buyer', 'product'])
    buyer_summary = buyer_group['buy_at_low'].mean().reset_index().rename(
        columns={'buy_at_low': 'pct_buy_at_low'}
    )
    buyer_summary['n_buys'] = buyer_group['buy_at_low'].count().values  # <-- Added

    # Calculate percentage and count of sells at high for each seller-product
    seller_group = df.groupby(['seller', 'product'])
    seller_summary = seller_group['sell_at_high'].mean().reset_index().rename(
        columns={'sell_at_high': 'pct_sell_at_high'}
    )
    seller_summary['n_sells'] = seller_group['sell_at_high'].count().values  # <-- Added

    # Only keep relevant columns
    buyer_summary = buyer_summary[['buyer', 'product', 'pct_buy_at_low', 'n_buys']]
    seller_summary = seller_summary[['seller', 'product', 'pct_sell_at_high', 'n_sells']]
    return buyer_summary, seller_summary
def flag_very_informed(summary, column, quantile=0.95):
    summary['very_informed'] = summary[column] > quantile
    return summary
def find_informed_traders(price_df=price_df, counterparty_df=counterparty_df, rolling_window=3, center=True, threshold=0.01, quantile=0.95):
    df_merged = price_df.merge(counterparty_df, left_on=['product', 't'], right_on=['symbol', 't'])
    df_with_extremes = add_price_extremes(df_merged, rolling_window, center)
    df_with_tags = tag_informed_trades(df_with_extremes, threshold)
    buyer_summary, seller_summary = summarize_infomed_activity(df_with_tags)
    buyer_summary = flag_very_informed(buyer_summary, 'pct_buy_at_low', quantile)
    seller_summary = flag_very_informed(seller_summary, 'pct_sell_at_high', quantile)
    return buyer_summary, seller_summary

In [125]:
buyers, sellers = find_informed_traders(rolling_window=50, threshold=0.001, quantile=0.95)
informed_buyers = buyers[buyers['very_informed']]
informed_sellers = sellers[sellers['very_informed']]

In [126]:
informed_buyers

Unnamed: 0,buyer,product,pct_buy_at_low,n_buys,very_informed
7,Caesar,RAINFOREST_RESIN,1.0,230,True
16,Camilla,DJEMBES,1.0,1,True
22,Camilla,RAINFOREST_RESIN,1.0,131,True
33,Charlie,RAINFOREST_RESIN,1.0,2202,True
39,Gina,RAINFOREST_RESIN,1.0,154,True
41,Olivia,CROISSANTS,1.0,3,True
43,Olivia,SQUID_INK,1.0,3,True


In [127]:
informed_sellers

Unnamed: 0,seller,product,pct_sell_at_high,n_sells,very_informed
7,Caesar,RAINFOREST_RESIN,1.0,246,True
22,Camilla,RAINFOREST_RESIN,1.0,126,True
33,Charlie,RAINFOREST_RESIN,1.0,2217,True
39,Gina,RAINFOREST_RESIN,1.0,187,True
41,Olga,KELP,1.0,1,True
42,Olivia,CROISSANTS,1.0,3,True


In [128]:
import plotly.express as px
import plotly.graph_objects as go

def get_trader_trades(trader, product):
    trader_trades = counterparty_df[((counterparty_df['seller'] == trader) | (counterparty_df['buyer'] == trader)) & (counterparty_df['symbol'] == product)].copy()
    trader_trades['side'] = (trader_trades['seller'] == trader).map({True: 'sell', False: 'buy'})
    return trader_trades
trader, product = 'Olivia', 'CROISSANTS'
trader_trades = get_trader_trades(trader, product)
product_prices = price_df.query('product == @product').sort_values('t')
merged_trades = pd.merge(product_prices, trader_trades, on='t', how='left')

def plot_trader_vs_mid(merged, trader_id, product_id):
    """Plots the trader's trades against the mid price."""
    # Scatter plot for trades
    fig = px.scatter(
        merged, x='t', y='price', color='side', symbol='side',
        hover_data=['price', 'quantity']
    )
    # Add mid price line using go.Scatter
    fig.add_trace(
        go.Scatter(
            x=merged['t'],
            y=merged['mid_price'],
            mode='lines',
            name='mid price',
            line=dict(color='blue', width=2)
        )
    )
    fig.update_layout(title=f'{trader_id} trades vs {product_id} mid price')
    fig.show()

plot_trader_vs_mid(merged_trades, trader, product)

# I love you Olivia :D