In [1]:
# Ensure the project root is in PATH.
import sys

sys.path.append("../")
# All imports of our code are relative to the project root.

from backtester.engine import Backtester
from backtester.datamodel import TradingState, OrderDepth, Order, Listing
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd
import sys
import os


# concatenates multiple days of historical data into 1.
# drops day column and replaces it with continuous timestamps.
# i.e. day -1 timestamp 0 becomes just timestamp 1,000,000
def concatenate_historical_data(data: list[pd.DataFrame]) -> pd.DataFrame:
    output = data[0]

    for i in range(1, len(data), 1):
        timeshift = output.iloc[-1]["timestamp"] + 100  # 100 for next day
        next_day_copy = data[i].copy()
        next_day_copy["timestamp"] += timeshift

        output = pd.concat([output, next_day_copy])

    return output

In [2]:
market_data_round_4_day_1 = pd.read_csv(os.path.join("..", "data", "round4", "prices_round_4_day_1.csv"), sep=";")
market_data_round_4_day_2 = pd.read_csv(os.path.join("..", "data", "round4", "prices_round_4_day_2.csv"), sep=";")
market_data_round_4_day_3 = pd.read_csv(os.path.join("..", "data", "round4", "prices_round_4_day_3.csv"), sep=";")

trades_round_4_day_1 = pd.read_csv(os.path.join("..", "data", "round4", "trades_round_4_day_1.csv"), sep=";")
trades_round_4_day_2 = pd.read_csv(os.path.join("..", "data", "round4", "trades_round_4_day_2.csv"), sep=";")
trades_round_4_day_3 = pd.read_csv(os.path.join("..", "data", "round4", "trades_round_4_day_3.csv"), sep=";")

observations_round_4_day_1 = pd.read_csv(os.path.join("..", "data", "round4", "observations_round_4_day_1.csv"), sep=",")
observations_round_4_day_2 = pd.read_csv(os.path.join("..", "data", "round4", "observations_round_4_day_2.csv"), sep=",")
observations_round_4_day_3 = pd.read_csv(os.path.join("..", "data", "round4", "observations_round_4_day_3.csv"), sep=",")

market_data_round_4_all3days = concatenate_historical_data([market_data_round_4_day_1, market_data_round_4_day_2, market_data_round_4_day_3])
trades_round_4_all3days = concatenate_historical_data([trades_round_4_day_1, trades_round_4_day_2, trades_round_4_day_3])
observations_round_4_all3days = concatenate_historical_data([observations_round_4_day_1, observations_round_4_day_2, observations_round_4_day_3])

In [None]:
MAGNIFICENT_MACARONS = "MAGNIFICENT_MACARONS"


def get_time_part(df: pd.DataFrame, l, h) -> pd.DataFrame:
    dfret = df.copy()
    dfret = dfret[(dfret["timestamp"] >= l) & (dfret["timestamp"] < h)].reset_index(drop=True)
    return dfret


def get_sunlight_part(
    md: pd.DataFrame, th: pd.DataFrame, obs: pd.DataFrame, low: float, high: float
) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """
    Return three DataFrames (md_slice, th_slice, obs_slice) containing
    only the rows whose timestamps fall into the obs rows where
    sunlightIndex is in [low, high).
    """
    # 1) find all timestamps where sunlight is in the desired band
    mask = (obs["sunlightIndex"] >= low) & (obs["sunlightIndex"] < high)
    ts = obs.loc[mask, "timestamp"]

    # 2) slice each DataFrame by those timestamps
    md_slice = md[md["timestamp"].isin(ts)].reset_index(drop=True)
    th_slice = th[th["timestamp"].isin(ts)].reset_index(drop=True)
    obs_slice = obs.loc[mask].reset_index(drop=True)

    return md_slice, th_slice, obs_slice


md_all = market_data_round_4_all3days.copy()
th_all = trades_round_4_all3days.copy()
obs_all = observations_round_4_all3days.copy()

md_all = md_all[md_all["product"] == MAGNIFICENT_MACARONS]
th_all = th_all[th_all["symbol"] == MAGNIFICENT_MACARONS]


l, h = 0, 3e6
md_all = get_time_part(md_all, l, h)
th_all = get_time_part(th_all, l, h)
obs_all = get_time_part(obs_all, l, h)



### First lets try split the data into regimes 
 

Regime splitting: By sunlight value

In [4]:

# sunlight_thresh1 = 37
# sunlight_thresh2 = 54

# md_regime1, th_regime1, obs_regime1 = get_sunlight_part(md_all, th_all, obs_all, 0, sunlight_thresh1)
# md_regime2, th_regime2, obs_regime2 = get_sunlight_part(md_all, th_all, obs_all, sunlight_thresh1, sunlight_thresh2)
# md_regime3, th_regime3, obs_regime3 = get_sunlight_part(md_all, th_all, obs_all, sunlight_thresh2, 99999)


# mds = [md_regime1, md_regime2, md_regime3]
# ths = [th_regime1, th_regime2, th_regime3]
# obss = [obs_regime1, obs_regime2, obs_regime3]


Regime splitting: by each piecewise part

In [5]:
# md_regime1 = get_time_part(md_all, 0, 2e5)
# th_regime1 = get_time_part(th_all, 0, 2e5)
# obs_regime1 = get_time_part(obs_all, 0, 2e5)

# md_regime2 = get_time_part(md_all, 2e5, 4e5)
# th_regime2 = get_time_part(th_all, 2e5, 4e5)
# obs_regime2 = get_time_part(obs_all, 2e5, 4e5)

# md_regime3 = get_time_part(md_all, 4e5, 5e5)
# th_regime3 = get_time_part(th_all, 4e5, 5e5)
# obs_regime3 = get_time_part(obs_all, 4e5, 5e5)

# md_regime4 = get_time_part(md_all, 5e5, 6e5)
# th_regime4 = get_time_part(th_all, 5e5, 6e5)
# obs_regime4 = get_time_part(obs_all, 5e5, 6e5)

# md_regime5 = get_time_part(md_all, 6e5, 7e5)
# th_regime5 = get_time_part(th_all, 6e5, 7e5)
# obs_regime5 = get_time_part(obs_all, 6e5, 7e5)

# md_regime6 = get_time_part(md_all, 7e5, 9e5)
# th_regime6 = get_time_part(th_all, 7e5, 9e5)
# obs_regime6 = get_time_part(obs_all, 7e5, 9e5)

# md_regime7 = get_time_part(md_all, 9e5, 10e5)
# th_regime7 = get_time_part(th_all, 9e5, 10e5)
# obs_regime7 = get_time_part(obs_all, 9e5, 10e5)

# mds = [md_regime1, md_regime2, md_regime3, md_regime4, md_regime5, md_regime6, md_regime7]
# ths = [th_regime1, th_regime2, th_regime3, th_regime4, th_regime5, th_regime6, th_regime7]
# obss = [obs_regime1, obs_regime2, obs_regime3, obs_regime4, obs_regime5, obs_regime6, obs_regime7]

Regime splitting: by up/down/flat

In [6]:
# up
md_regime1 = pd.concat([get_time_part(md_all, 0, 2e5), get_time_part(md_all, 4e5, 5e5), get_time_part(md_all, 9e5, 10e5)], ignore_index=True)
th_regime1 = pd.concat([get_time_part(th_all, 0, 2e5), get_time_part(th_all, 4e5, 5e5), get_time_part(th_all, 9e5, 10e5)], ignore_index=True)
obs_regime1 = pd.concat([get_time_part(obs_all, 0, 2e5), get_time_part(obs_all, 4e5, 5e5), get_time_part(obs_all, 9e5, 10e5)], ignore_index=True)

# down
md_regime2 = pd.concat([get_time_part(md_all, 2e5, 4e5), get_time_part(md_all, 6e5, 7e5)], ignore_index=True)
th_regime2 = pd.concat([get_time_part(th_all, 2e5, 4e5), get_time_part(th_all, 6e5, 7e5)], ignore_index=True)
obs_regime2 = pd.concat([get_time_part(obs_all, 2e5, 4e5), get_time_part(obs_all, 6e5, 7e5)], ignore_index=True)

# flat
md_regime3 = pd.concat([get_time_part(md_all, 5e5, 6e5), get_time_part(md_all, 7e5, 9e5)], ignore_index=True)
th_regime3 = pd.concat([get_time_part(th_all, 5e5, 6e5), get_time_part(th_all, 7e5, 9e5)], ignore_index=True)
obs_regime3 = pd.concat([get_time_part(obs_all, 5e5, 6e5), get_time_part(obs_all, 7e5, 9e5)], ignore_index=True)

mds = [md_regime1, md_regime2, md_regime3]
ths = [th_regime1, th_regime2, th_regime3]
obss = [obs_regime1, obs_regime2, obs_regime3]

In [68]:
k = 1
md = get_time_part(md_all, 0, 1e6)  # 2e5)
th = get_time_part(th_all, 0, 1e6)  # 2e5)
obs = get_time_part(obs_all, 0, 1e6)  # 2e5)


obs["actual_ask"] = obs["askPrice"] + obs["transportFees"] + obs["importTariff"]
obs["actual_bid"] = obs["bidPrice"] - obs["transportFees"] - obs["exportTariff"]

# plt.figure(figsize=(12,6))
# plt.plot(md['timestamp'], md['bid_price_1'],label = 'bid_price_1')
# plt.plot(md['timestamp'], obs['actual_ask'],label='actual_ask')
# plt.plot(md['timestamp'], md['bid_price_1'],label='bid_price_1')
# plt.plot(md['timestamp'], md['ask_price_1'],label='ask_price_1')
# plt.plot(md['timestamp'], md['bid_price_2'],label='bid_price_2')
# plt.plot(md['timestamp'], md['ask_price_2'],label='ask_price_2')
# plt.plot(md['timestamp'], md['bid_price_3'],label='bid_price_3')
# plt.plot(md['timestamp'], md['ask_price_3'],label='ask_price_3')

# plt.plot(obs['timestamp'], obs['actual_ask'],label='actual_ask_chefs')
# plt.plot(obs['timestamp'], obs['actual_bid'],label='actual_bid_chefs')
# plt.scatter(md['mid_price'], obs['transportFees'])


# plt.legend()
# plt.show()


# Arb 1: buy from chefs, sell to local
mask1 = obs["actual_ask"] < md["bid_price_1"]
print(f"{np.sum(mask1)} opportunities for arb 1")
diff = md['bid_price_1'] - obs['actual_ask']
profit = diff * md['bid_volume_1']
profit = profit[mask1]
print(f"Max profit: {np.sum(profit)}")
print(np.mean(md['bid_volume_1'][mask1]))

# Arb 2: buy from local, sell to chefs
mask2 = obs["actual_bid"] > md["ask_price_1"]
print(f"{np.sum(mask2)} opportunities for arb 2")

325 opportunities for arb 1
Max profit: 2461.7999999999856
6.593846153846154
0 opportunities for arb 2


I think I give up on price prediction at this point. There doesn't seem to be any predictive power in sunlight or sugar for price.  

Macaron and sugar price move together (corr = 0.6) (which suggests cointegration) but tests show they aren't cointegrated. Maybe look at this again later.  

Going back to the exchange arb that I found before, we can buy from chefs and sell to local. Theres quite few opportunities for this, netting only $4000 over 1e6 timestamps which is terrible, but what about buy/sell orders that come in?  
 
Last year they said that there was a 'big taker of orchids', maybe there will be a bot that is a big taker of orchids but just isn't present in the test data.