In [1]:
import pandas as pd
import os
from pathlib import Path
import re

def load_trading_data(data_folder: str) -> dict[str, pd.DataFrame]:
    """
    Load trading data from CSV files into pandas DataFrames, agnostic to round and day.

    Args:
        data_folder (str): Path to the folder containing the CSV files

    Returns:
        Dict[str, pd.DataFrame]: Dictionary containing the following DataFrames:
            - 'prices': Price data for all days/rounds
            - 'trades': Trade data for all days/rounds
            - 'observations': Observation data for all days/rounds
    """
    data = {
        'prices': [],
        'trades': [],
        'observations': []
    }

    # Regex to match files and extract type, round, and day
    pattern = re.compile(r'^(prices|trades|observations)_round_(\d+)_day_(\d+)\.csv$')

    for file in Path(data_folder).iterdir():
        if file.is_file():
            match = pattern.match(file.name)
            if match:
                data_type, round_num, day = match.groups()
                sep = ';' if data_type in ['prices', 'trades'] else ','
                df = pd.read_csv(file, sep=sep)
                df['day'] = int(day)
                df['round'] = int(round_num)
                if 'timestamp' in df.columns:
                    df = df.sort_values('timestamp')
                data[data_type].append(df)

    # Concatenate all days' data
    result = {}
    for key, dfs in data.items():
        if dfs:  # Only add if we found any data
            result[key] = pd.concat(dfs, ignore_index=True)
            result[key].sort_values(['day', 'timestamp'], inplace=True)

    return result

def convert_timestamp(df: pd.DataFrame) -> pd.DataFrame:
    return df.assign(t=(df['day'] - 1) * 1_000_000 + df['timestamp']).drop(columns=['day', 'timestamp'])

In [2]:
trading_data = load_trading_data('round-5-island-data-bottle')
price_df = trading_data['prices']
price_df = convert_timestamp(price_df)
observation_df = trading_data['observations']
observation_df = convert_timestamp(observation_df)
counterparty_df = trading_data['trades']
counterparty_df = convert_timestamp(counterparty_df).drop(columns='round').set_index('t')
parties = counterparty_df['seller'].unique()

In [3]:
def add_price_extremes(df, window=3, center=True):
    df = df.sort_values(['product', 't'])
    df['rolling_min'] = df.groupby('product')['mid_price'].transform(lambda x: x.rolling(window=window, min_periods=1, center=center).min())
    df['rolling_max'] = df.groupby('product')['mid_price'].transform(lambda x: x.rolling(window=window, min_periods=1, center=center).max())
    return df
def tag_informed_trades(df, threshold=0.01):
    df['buy_at_low'] = (df['price'] <= df['rolling_min'] * (1 + threshold))
    df['sell_at_high'] = (df['price'] >= df['rolling_max'] * (1 - threshold))
    return df
def summarize_informed_activity(df):
    # Calculate percentage and count of buys at low for each buyer-product
    buyer_group = df.groupby(['buyer', 'product'])
    buyer_summary = buyer_group['buy_at_low'].mean().reset_index().rename(
        columns={'buy_at_low': 'pct_buy_at_low'}
    )
    buyer_summary['n_buys'] = buyer_group['buy_at_low'].count().values  # <-- Added

    # Calculate percentage and count of sells at high for each seller-product
    seller_group = df.groupby(['seller', 'product'])
    seller_summary = seller_group['sell_at_high'].mean().reset_index().rename(
        columns={'sell_at_high': 'pct_sell_at_high'}
    )
    seller_summary['n_sells'] = seller_group['sell_at_high'].count().values  # <-- Added

    # Only keep relevant columns
    buyer_summary = buyer_summary[['buyer', 'product', 'pct_buy_at_low', 'n_buys']]
    seller_summary = seller_summary[['seller', 'product', 'pct_sell_at_high', 'n_sells']]
    return buyer_summary, seller_summary
def flag_very_informed(summary, column, quantile=0.95):
    summary['very_informed'] = summary[column] > quantile
    return summary
def find_informed_traders(price_df=price_df, counterparty_df=counterparty_df, rolling_window=3, center=True, threshold=0.01, quantile=0.95):
    df_merged = price_df.merge(counterparty_df, left_on=['product', 't'], right_on=['symbol', 't'])
    df_with_extremes = add_price_extremes(df_merged, rolling_window, center)
    df_with_tags = tag_informed_trades(df_with_extremes, threshold)
    buyer_summary, seller_summary = summarize_informed_activity(df_with_tags)
    buyer_summary = flag_very_informed(buyer_summary, 'pct_buy_at_low', quantile)
    seller_summary = flag_very_informed(seller_summary, 'pct_sell_at_high', quantile)
    return buyer_summary, seller_summary

In [4]:
buyers, sellers = find_informed_traders(rolling_window=50, threshold=0.001, quantile=0.95)
informed_buyers = buyers[buyers['very_informed']]
informed_sellers = sellers[sellers['very_informed']]

In [5]:
informed_buyers

Unnamed: 0,buyer,product,pct_buy_at_low,n_buys,very_informed
7,Caesar,RAINFOREST_RESIN,1.0,230,True
16,Camilla,DJEMBES,1.0,1,True
22,Camilla,RAINFOREST_RESIN,1.0,131,True
33,Charlie,RAINFOREST_RESIN,1.0,2202,True
39,Gina,RAINFOREST_RESIN,1.0,154,True
41,Olivia,CROISSANTS,1.0,3,True
43,Olivia,SQUID_INK,1.0,3,True


In [6]:
informed_sellers

Unnamed: 0,seller,product,pct_sell_at_high,n_sells,very_informed
7,Caesar,RAINFOREST_RESIN,1.0,246,True
22,Camilla,RAINFOREST_RESIN,1.0,126,True
33,Charlie,RAINFOREST_RESIN,1.0,2217,True
39,Gina,RAINFOREST_RESIN,1.0,187,True
41,Olga,KELP,1.0,1,True
42,Olivia,CROISSANTS,1.0,3,True


Rainforest resin doesn't really matter because it fluctuates so much

In [7]:
import plotly.express as px
import plotly.graph_objects as go

def get_trader_trades(trader, product):
    trader_trades = counterparty_df[((counterparty_df['seller'] == trader) | (counterparty_df['buyer'] == trader)) & (counterparty_df['symbol'] == product)].copy()
    trader_trades['side'] = (trader_trades['seller'] == trader).map({True: 'sell', False: 'buy'})
    return trader_trades

def plot_trader_vs_mid(trader, product):
    """Plots the trader's trades against the mid price."""
    trader_trades = get_trader_trades(trader, product)
    product_prices = price_df.query('product == @product').sort_values('t')
    merged_trades = pd.merge(product_prices, trader_trades, on='t', how='left')
    # Scatter plot for trades
    fig = px.scatter(
        merged_trades, x='t', y='price', color='side', symbol='side',
        hover_data=['price', 'quantity']
    )
    # Add mid price line using go.Scatter
    fig.add_trace(
        go.Scatter(
            x=merged_trades['t'],
            y=merged_trades['mid_price'],
            mode='lines',
            name='mid price',
            line=dict(color='blue', width=2)
        )
    )
    fig.update_layout(title=f'{trader} trades vs {product} mid price')
    fig.show()

In [8]:
plot_trader_vs_mid('Olivia', 'CROISSANTS')

# I love you Olivia :D

In [11]:
counterparty_df.query('seller == "Olivia"')

Unnamed: 0_level_0,buyer,seller,symbol,currency,price,quantity
t,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1102400,Charlie,Olivia,KELP,SEASHELLS,2050.0,15
1201600,Caesar,Olivia,CROISSANTS,SEASHELLS,4278.0,3
1580200,Paris,Olivia,SQUID_INK,SEASHELLS,1926.0,1
1580200,Charlie,Olivia,SQUID_INK,SEASHELLS,1924.0,14
2158200,Charlie,Olivia,KELP,SEASHELLS,2044.0,15
2928000,Charlie,Olivia,SQUID_INK,SEASHELLS,1986.0,15
2971000,Caesar,Olivia,CROISSANTS,SEASHELLS,4314.0,3
3051100,Charlie,Olivia,SQUID_INK,SEASHELLS,2047.0,15
3077700,Charlie,Olivia,KELP,SEASHELLS,2037.0,15
3903700,Caesar,Olivia,CROISSANTS,SEASHELLS,4307.0,3


In [12]:
plot_trader_vs_mid('Olivia', 'SQUID_INK')

# Wow olivia might be a Congresswoman

In [13]:
def summarize_informed_pair_activity(df):
    group = df.groupby(['buyer', 'seller', 'product'])
    pct_buy_at_low = group['buy_at_low'].mean().reset_index(name='pct_buy_at_low')
    n_buys = group['buy_at_low'].count().reset_index(name='n_buys')
    pct_sell_at_high = group['sell_at_high'].mean().reset_index(name='pct_sell_at_high')
    n_sells = group['sell_at_high'].count().reset_index(name='n_sells')
    summary = pct_buy_at_low.merge(n_buys, on=['buyer', 'seller', 'product'])
    summary = summary.merge(pct_sell_at_high, on=['buyer', 'seller', 'product'])
    summary = summary.merge(n_sells, on=['buyer', 'seller', 'product'])
    return summary
def find_informed_trader_pairs(price_df=price_df, counterparty_df=counterparty_df, rolling_window=3, center=True, threshold=0.01, quantile=0.95):
    df_merged = price_df.merge(counterparty_df, left_on=['product', 't'], right_on=['symbol', 't'])
    df_with_extremes = add_price_extremes(df_merged, rolling_window, center)
    df_with_tags = tag_informed_trades(df_with_extremes, threshold)
    summary = summarize_informed_pair_activity(df_with_tags)
    summary = flag_very_informed(summary, 'pct_buy_at_low', quantile)
    summary = flag_very_informed(summary, 'pct_sell_at_high', quantile)
    return summary
summary = find_informed_trader_pairs(rolling_window=50, threshold=0.001, quantile=0.95)
summary.query('very_informed and product != "RAINFOREST_RESIN"') # Again, ignore rainforest resin



Unnamed: 0,buyer,seller,product,pct_buy_at_low,n_buys,pct_sell_at_high,n_sells,very_informed
19,Caesar,Olivia,CROISSANTS,0.0,3,1.0,3,True
77,Camilla,Paris,DJEMBES,1.0,1,1.0,1,True
124,Gary,Charlie,KELP,0.033019,212,0.957547,212,True
129,Gary,Olga,KELP,0.0,1,1.0,1,True
138,Gina,Caesar,KELP,0.333333,6,1.0,6,True
147,Gina,Gary,KELP,0.0,2,1.0,2,True
156,Gina,Penelope,KELP,0.4,5,1.0,5,True
187,Pablo,Gina,KELP,0.4,5,1.0,5,True
225,Paris,Olivia,SQUID_INK,0.0,1,1.0,1,True
256,Penelope,Charlie,KELP,0.022923,349,0.977077,349,True


In [14]:
def get_trader_pair_trades(buyer, seller, product):
    trader_trades = counterparty_df[((counterparty_df['seller'] == seller) | (counterparty_df['buyer'] == buyer)) & (counterparty_df['symbol'] == product)].copy()
    trader_trades['side'] = (trader_trades['seller'] == seller).map({True: 'sell', False: 'buy'})
    return trader_trades

def plot_trader_pair_vs_mid(trader1, trader2, product):
    """Plots the trader's trades against the mid price."""
    trader_trades = get_trader_pair_trades(trader1, trader2, product)
    product_prices = price_df.query('product == @product').sort_values('t')
    merged_trades = pd.merge(product_prices, trader_trades, on='t', how='left')
    # Scatter plot for trades
    fig = px.scatter(
        merged_trades, x='t', y='price', color='side', symbol='side',
        hover_data=['price', 'quantity']
    )
    # Add mid price line using go.Scatter
    fig.add_trace(
        go.Scatter(
            x=merged_trades['t'],
            y=merged_trades['mid_price'],
            mode='lines',
            name='mid price',
            line=dict(color='blue', width=2)
        )
    )
    fig.update_layout(title=f'{trader1} and {trader2} trades vs {product} mid price')
    fig.show()

In [15]:
plot_trader_pair_vs_mid('Penelope', 'Charlie', 'KELP') # Can't find any meaningful informed trading pairs

In [18]:
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import numpy as np

def get_synthetic_value(df, basket_type):
    """Calculate synthetic value based on component prices."""
    if basket_type == "PICNIC_BASKET1":
        # 6 CROISSANTS + 3 JAMS + 1 DJEMBE
        croissant_prices = df[df['product'] == 'CROISSANTS'].set_index('t')['mid_price']
        jam_prices = df[df['product'] == 'JAMS'].set_index('t')['mid_price']
        djembe_prices = df[df['product'] == 'DJEMBES'].set_index('t')['mid_price']
        
        # Create a common time index and forward fill values
        all_times = sorted(set(croissant_prices.index) | set(jam_prices.index) | set(djembe_prices.index))
        croissant_prices = croissant_prices.reindex(all_times).ffill()  # Changed from fillna(method='ffill')
        jam_prices = jam_prices.reindex(all_times).ffill()  # Changed from fillna(method='ffill')
        djembe_prices = djembe_prices.reindex(all_times).ffill()  # Changed from fillna(method='ffill')
        
        # Calculate synthetic value
        synthetic_values = pd.DataFrame(index=all_times)
        synthetic_values['synthetic_value'] = 6 * croissant_prices + 3 * jam_prices + 1 * djembe_prices
        synthetic_values['t'] = synthetic_values.index
        return synthetic_values
    
    elif basket_type == "PICNIC_BASKET2":
        # 4 CROISSANTS + 2 JAMS
        croissant_prices = df[df['product'] == 'CROISSANTS'].set_index('t')['mid_price']
        jam_prices = df[df['product'] == 'JAMS'].set_index('t')['mid_price']
        
        # Create a common time index and forward fill values
        all_times = sorted(set(croissant_prices.index) | set(jam_prices.index))
        croissant_prices = croissant_prices.reindex(all_times).ffill()  # Changed from fillna(method='ffill')
        jam_prices = jam_prices.reindex(all_times).ffill()  # Changed from fillna(method='ffill')
        
        # Calculate synthetic value
        synthetic_values = pd.DataFrame(index=all_times)
        synthetic_values['synthetic_value'] = 4 * croissant_prices + 2 * jam_prices
        synthetic_values['t'] = synthetic_values.index
        return synthetic_values
    
    return None

def get_olivia_trades(trader="Olivia", products=None):
    """Get all trades made by Olivia for the specified products."""
    if products is None:
        products = ["PICNIC_BASKET1", "PICNIC_BASKET2", "CROISSANTS", "JAMS", "DJEMBES"]
    
    olivia_trades = counterparty_df[((counterparty_df['seller'] == trader) | 
                                     (counterparty_df['buyer'] == trader)) & 
                                    (counterparty_df['symbol'].isin(products))].copy()
    
    olivia_trades['side'] = (olivia_trades['seller'] == trader).map({True: 'sell', False: 'buy'})
    olivia_trades = olivia_trades.reset_index()
    return olivia_trades

def plot_basket_spread_with_trades(basket_type="PICNIC_BASKET1"):
    """Plot the spread between basket price and synthetic value with trader trades."""
    # Get basket prices
    basket_prices = price_df[price_df['product'] == basket_type].copy()
    
    # Calculate synthetic values
    synthetic_values = get_synthetic_value(price_df, basket_type)
    
    # Merge basket prices with synthetic values
    merged_data = pd.merge(basket_prices, synthetic_values, on='t', how='outer')
    merged_data = merged_data.sort_values('t')
    
    # Calculate spread
    merged_data['spread'] = merged_data['mid_price'] - merged_data['synthetic_value']
    
    # Get Olivia's trades
    olivia_trades = get_olivia_trades()
    basket_trades = olivia_trades[olivia_trades['symbol'] == basket_type]
    
    # Create figure
    fig = go.Figure()
    
    # Add spread line
    fig.add_trace(
        go.Scatter(
            x=merged_data['t'],
            y=merged_data['spread'],
            mode='lines',
            name='Basket Spread (Price - Synthetic)',
            line=dict(color='blue', width=2)
        )
    )
    
    # Add horizontal line at zero
    fig.add_trace(
        go.Scatter(
            x=[merged_data['t'].min(), merged_data['t'].max()],
            y=[0, 0],
            mode='lines',
            name='Fair Value',
            line=dict(color='black', dash='dash', width=1)
        )
    )
    
    # Add Olivia's trades
    buy_trades = basket_trades[basket_trades['side'] == 'buy']
    sell_trades = basket_trades[basket_trades['side'] == 'sell']
    
    # Add buy trades
    if not buy_trades.empty:
        # For buys, calculate the spread at those points
        buy_spreads = []
        for idx, trade in buy_trades.iterrows():
            closest_idx = (merged_data['t'] - trade['t']).abs().idxmin()
            buy_spreads.append(merged_data.loc[closest_idx, 'spread'])
        
        fig.add_trace(
            go.Scatter(
                x=buy_trades['t'],
                y=buy_spreads,
                mode='markers',
                name='Olivia Buys Basket',
                marker=dict(color='green', size=10, symbol='triangle-up'),
                text=buy_trades.apply(lambda x: f"Price: {x['price']}, Quantity: {x['quantity']}", axis=1)
            )
        )
    
    # Add sell trades
    if not sell_trades.empty:
        # For sells, calculate the spread at those points
        sell_spreads = []
        for idx, trade in sell_trades.iterrows():
            closest_idx = (merged_data['t'] - trade['t']).abs().idxmin()
            sell_spreads.append(merged_data.loc[closest_idx, 'spread'])
        
        fig.add_trace(
            go.Scatter(
                x=sell_trades['t'],
                y=sell_spreads,
                mode='markers',
                name='Olivia Sells Basket',
                marker=dict(color='red', size=10, symbol='triangle-down'),
                text=sell_trades.apply(lambda x: f"Price: {x['price']}, Quantity: {x['quantity']}", axis=1)
            )
        )
    
    # Add annotations for component buys
    component_buys = olivia_trades[
        (olivia_trades['symbol'].isin(['CROISSANTS', 'JAMS', 'DJEMBES'])) & 
        (olivia_trades['side'] == 'buy')
    ]
    
    if not component_buys.empty:
        for idx, trade in component_buys.iterrows():
            closest_idx = (merged_data['t'] - trade['t']).abs().idxmin()
            if closest_idx in merged_data.index:
                spread_value = merged_data.loc[closest_idx, 'spread']
                
                fig.add_annotation(
                    x=trade['t'],
                    y=spread_value,
                    text=f"Olivia buys {trade['symbol']}",
                    showarrow=True,
                    arrowhead=2,
                    arrowsize=1,
                    arrowwidth=1,
                    arrowcolor='purple'
                )
    
    # Add annotations for Olivia's croissant sells
    croissant_sells = olivia_trades[
        (olivia_trades['symbol'] == 'CROISSANTS') & 
        (olivia_trades['side'] == 'sell')
    ]
    
    if not croissant_sells.empty:
        for idx, trade in croissant_sells.iterrows():
            closest_idx = (merged_data['t'] - trade['t']).abs().idxmin()
            if closest_idx in merged_data.index:
                spread_value = merged_data.loc[closest_idx, 'spread']
                
                fig.add_annotation(
                    x=trade['t'],
                    y=spread_value,
                    text=f"Olivia sells CROISSANTS",
                    showarrow=True,
                    arrowhead=2,
                    arrowsize=1,
                    arrowwidth=1,
                    arrowcolor='orange',
                    ax=0,
                    ay=-40
                )
    
    # Update layout
    fig.update_layout(
        title=f'{basket_type} Spread (Price - Synthetic Value) with Olivia\'s Trades',
        xaxis_title='Timestamp',
        yaxis_title='Spread',
        hovermode='closest',
        legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='right', x=1)
    )
    
    return fig

# Plot both basket types
fig1 = plot_basket_spread_with_trades("PICNIC_BASKET1")
fig1.show()

fig2 = plot_basket_spread_with_trades("PICNIC_BASKET2")
fig2.show()