In [52]:
# Ensure the project root is in PATH.
import sys

sys.path.append("../")
# All imports of our code are relative to the project root.

from backtester.engine import Backtester
from backtester.datamodel import TradingState, OrderDepth, Order, Listing
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd
import sys
import os
from backtester.log import Log

from collections import defaultdict


# concatenates multiple days of historical data into 1.
# drops day column and replaces it with continuous timestamps.
# i.e. day -1 timestamp 0 becomes just timestamp 1,000,000
def concatenate_historical_data(data: list[pd.DataFrame]) -> pd.DataFrame:
    output = data[0]

    for i in range(1, len(data), 1):
        timeshift = output.iloc[-1]["timestamp"] + 100  # 100 for next day
        next_day_copy = data[i].copy()
        next_day_copy["timestamp"] += timeshift

        output = pd.concat([output, next_day_copy])

    return output


def get_time_part(df: pd.DataFrame, l, h) -> pd.DataFrame:
    dfret = df.copy()
    dfret = dfret[(dfret["timestamp"] >= l) & (dfret["timestamp"] < h)].reset_index(drop=True)
    return dfret

In [53]:
CROISSANTS = "CROISSANTS"
DJEMBES = "DJEMBES"
JAMS = "JAMS"
KELP = "KELP"
PICNIC_BASKET1 = "PICNIC_BASKET1"
PICNIC_BASKET2 = "PICNIC_BASKET2"
RAINFOREST_RESIN = "RAINFOREST_RESIN"
SQUID_INK = "SQUID_INK"
VOLCANIC_ROCK = "VOLCANIC_ROCK"
VOLCANIC_ROCK_VOUCHER_10000 = "VOLCANIC_ROCK_VOUCHER_10000"
VOLCANIC_ROCK_VOUCHER_10250 = "VOLCANIC_ROCK_VOUCHER_10250"
VOLCANIC_ROCK_VOUCHER_10500 = "VOLCANIC_ROCK_VOUCHER_10500"
VOLCANIC_ROCK_VOUCHER_9500 = "VOLCANIC_ROCK_VOUCHER_9500"
VOLCANIC_ROCK_VOUCHER_9750 = "VOLCANIC_ROCK_VOUCHER_9750"
MAGNIFICENT_MACARONS = "MAGNIFICENT_MACARONS"

market_data_round_5_day_2 = pd.read_csv(os.path.join("..", "data", "round5", "prices_round_5_day_2.csv"), sep=";")
market_data_round_5_day_3 = pd.read_csv(os.path.join("..", "data", "round5", "prices_round_5_day_3.csv"), sep=";")
market_data_round_5_day_4 = pd.read_csv(os.path.join("..", "data", "round5", "prices_round_5_day_4.csv"), sep=";")

trades_round_5_day_2 = pd.read_csv(os.path.join("..", "data", "round5", "trades_round_5_day_2.csv"), sep=";")
trades_round_5_day_3 = pd.read_csv(os.path.join("..", "data", "round5", "trades_round_5_day_3.csv"), sep=";")
trades_round_5_day_4 = pd.read_csv(os.path.join("..", "data", "round5", "trades_round_5_day_4.csv"), sep=";")

observations_round_5_day_2 = pd.read_csv(os.path.join("..", "data", "round5", "observations_round_5_day_2.csv"), sep=",")
observations_round_5_day_3 = pd.read_csv(os.path.join("..", "data", "round5", "observations_round_5_day_3.csv"), sep=",")
observations_round_5_day_4 = pd.read_csv(os.path.join("..", "data", "round5", "observations_round_5_day_4.csv"), sep=",")

market_data_round_5_all3days = concatenate_historical_data([market_data_round_5_day_2, market_data_round_5_day_3, market_data_round_5_day_4])
trades_round_5_all3days = concatenate_historical_data([trades_round_5_day_2, trades_round_5_day_3, trades_round_5_day_4])
observations_round_5_all3days = concatenate_historical_data([observations_round_5_day_2, observations_round_5_day_3, observations_round_5_day_4])

round4finallog = Log.from_file("round4final.log")


In [54]:
md = market_data_round_5_all3days.copy()
th = trades_round_5_all3days.copy()
obs = observations_round_5_all3days.copy()

In [55]:
def calculatepnl(market_data: pd.DataFrame, trade_history: pd.DataFrame, product: str, winner: str, loser: str, eval="midpoint"):
    # example. lets see if:
    # whenever gary trades with gina, gary is on the winning side.
    # lets buy whenever gary buys from gina, and sell whenever gary sells to gina.

    md = market_data.copy()
    md = md[md["product"] == product].reset_index(drop=True)

    th = trade_history.copy()
    th = th[th["symbol"] == product].reset_index(drop=True)

    data = pd.merge(th, md, how="left", on="timestamp")

    buy_mask = ((data["buyer"] == winner) | (winner == "any")) & ((data["seller"] == loser) | (loser == "any"))
    sell_mask = ((data["seller"] == winner) | (winner == "any")) & ((data["buyer"] == loser) | (loser == "any"))

    buy_value = 0
    sell_value = 0
    if eval == "midpoint":
        buy_value = np.sum(buy_mask * data["mid_price"])
        sell_value = np.sum(sell_mask * data["mid_price"])
    elif eval == "spreadcrossing":
        buy_value = np.sum(buy_mask * data["ask_price_1"])
        sell_value = np.sum(sell_mask * data["bid_price_1"])
    else:
        raise Exception("Evaluation must be midpoint or spreadcrossing")

    buy_qty = np.sum(buy_mask)
    sell_qty = np.sum(sell_mask)

    net_qty = buy_qty - sell_qty
    final_price = data.iloc[-1]["mid_price"]

    pnl = -buy_value + sell_value + net_qty * final_price

    return pnl, buy_qty, sell_qty


# grid search
profit = {}
for product in np.unique(md["product"]):
    buyers = list(np.unique(th[th["symbol"] == product]["buyer"]))
    sellers = list(np.unique(th[th["symbol"] == product]["seller"]))
    parties = list(set(buyers) | set(sellers))

    for buyer in parties:
        for seller in parties:
            if buyer == seller:
                continue

            for eval in ["spreadcrossing", "midpoint"]:
                profit[product, buyer, seller, eval] = calculatepnl(md, th, product, buyer, seller, eval)

In [58]:
sss = sorted(profit.items(), key = lambda x: x[1][0])

for k,v  in sss:
    product, buyer, seller, eval = k
    pnl, buy_qty, sell_qty = v

    print(f"Product: {product}, buyer: {buyer}, seller: {seller}, eval: {eval} -> PNL: {pnl}, buy_qty: {buy_qty}, sell_qty: {sell_qty}")

Product: VOLCANIC_ROCK_VOUCHER_9500, buyer: Caesar, seller: Camilla, eval: spreadcrossing -> PNL: -194683.5, buy_qty: 0, sell_qty: 1499
Product: VOLCANIC_ROCK_VOUCHER_9750, buyer: Caesar, seller: Camilla, eval: spreadcrossing -> PNL: -137665.0, buy_qty: 0, sell_qty: 1544
Product: VOLCANIC_ROCK_VOUCHER_9500, buyer: Caesar, seller: Camilla, eval: midpoint -> PNL: -49818.5, buy_qty: 0, sell_qty: 1499
Product: PICNIC_BASKET2, buyer: Pablo, seller: Charlie, eval: spreadcrossing -> PNL: -45334.5, buy_qty: 0, sell_qty: 157
Product: PICNIC_BASKET2, buyer: Pablo, seller: Charlie, eval: midpoint -> PNL: -44914.0, buy_qty: 0, sell_qty: 157
Product: VOLCANIC_ROCK_VOUCHER_9750, buyer: Caesar, seller: Camilla, eval: midpoint -> PNL: -44124.5, buy_qty: 0, sell_qty: 1544
Product: VOLCANIC_ROCK_VOUCHER_10000, buyer: Caesar, seller: Camilla, eval: spreadcrossing -> PNL: -36101.0, buy_qty: 0, sell_qty: 1562
Product: SQUID_INK, buyer: Paris, seller: Charlie, eval: spreadcrossing -> PNL: -31231.5, buy_qty:

Most notable results  

Product: SQUID_INK, buyer: Charlie, seller: Paris, eval: midpoint -> PNL: 26455.5, buy_qty: 2291, sell_qty: 1520  
Product: PICNIC_BASKET2, buyer: Camilla, seller: Pablo, eval: midpoint -> PNL: 26549.5, buy_qty: 217, sell_qty: 121  
Product: PICNIC_BASKET2, buyer: Caesar, seller: Penelope, eval: midpoint -> PNL: 5457.0, buy_qty: 144, sell_qty: 126  


maybe we can combine multiple pairs for a symbol and the more pairs that give a consistent signal, the higher the confidence? like a random forest 