<a href="https://colab.research.google.com/github/MatteoOnger/algo-collusion-mm/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Algorithmic Collusion in Market Making

A notebook testing various (RL) agents implementing market-making strategies in the Glosten-Milgrom environment.

## Notebook Initialization

### Colab Environment Setup

In [1]:
# Do NOT run this cell in local environment - it's intended for Google Colab only.

# Clone GitHub repository
!git clone https://github.com/MatteoOnger/algo-collusion-mm.git

# Install dependencies
!pip install --quiet -r /content/algo-collusion-mm/requirements.txt

# Set working directory
%cd /content/algo-collusion-mm

Cloning into 'algo-collusion-mm'...
remote: Enumerating objects: 391, done.[K
remote: Counting objects: 100% (108/108), done.[K
remote: Compressing objects: 100% (86/86), done.[K
remote: Total 391 (delta 44), reused 82 (delta 22), pack-reused 283 (from 1)[K
Receiving objects: 100% (391/391), 81.00 KiB | 1.09 MiB/s, done.
Resolving deltas: 100% (172/172), done.
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m515.7/515.7 kB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m852.5/852.5 kB[0m [31m36.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m49.5 MB/s[0m eta [36m0:00:00[0m
[?25h/content/algo-collusion-mm


### Local Environment Setup

In [None]:
# Do NOT run this cell in Google Colab - it's intended for local Jupyter Notebooks only.

# Autoreload imports
%load_ext autoreload
%autoreload 2

# Select interactive backend for matplotlib
%matplotlib widget

## Main Execution

In [2]:
import json
import matplotlib.pyplot as plt
import numpy as np

import src.utils.plots as plots
import src.utils.storage as storage

from src.agents.agent import Agent
from src.agents.makers.exp3 import MakerEXP3
from src.agents.makers.mlql import MakerMLQL, MakerInformedMLQL
from src.agents.makers.ql import MakerInformedQL
from src.agents.traders.basic import BasicTrader
from src.agents.traders.nopass import NoPassTrader
from src.envs import GMEnv

In [3]:
def split_array(arr: np.ndarray, window_size: int) -> np.ndarray:
    """
    Split an array into sub-arrays of fixed window size along the last axis.

    If `window_size` is non-positive, the array is reshaped so that the last
    axis becomes a single window of length equal to its size.
    This is useful, for example, to ensure a consistent 3D shape
    when no actual splitting is performed.

    Parameters
    ----------
    arr : np.ndarray
        Input array to be split.
    window_size : int
        Size of each window. Must be a positive integer.
        If <= 0, the array is reshaped to (..., 1, N), where N is the
        original length of the last axis.

    Returns
    -------
    : np.ndarray
        Reshaped array with shape (..., n_windows, window_size).
        If `window_size <= 0`, returns the original array.

    Raises
    ------
    ValueError
        If `window_size` is not a divisor of the length of the last axis.
    """
    if window_size <= 0:
        return arr.reshape(arr.shape[:-1] + (1, -1))
    return arr.reshape(arr.shape[:-1] + (-1, window_size))


def get_calvano_collusion_index(rewards: np.ndarray, nash_reward: float, coll_reward: float, window_size: int = 0) -> np.ndarray:
    """
    Compute the Calvano Collusion Index (CCI) from agent rewards.

    The CCI measures the degree of collusion relative to Nash equilibrium
    and perfect collusion benchmarks. Rewards are optionally aggregated
    over fixed-size windows before computing the index.

    Parameters
    ----------
    rewards : np.ndarray
        Array of shape (n_agents, n_episodes) containing per-agent rewards.
    nash_reward : float
        Benchmark reward under Nash equilibrium (total across all agents).
    coll_reward : float
        Benchmark reward under perfect collusion (total across all agents).
    window_size : int, default=0
        Size of the episode window for reward aggregation.
        If 0, no windowing is applied.

    Returns
    -------
    : np.ndarray
        Array of CCI values per agent and per window.

    See Also
    --------
    - Calvano, E., Calzolari, G., Denicolò, V., & Pastorello, S. (2020).
    Artificial intelligence, algorithmic pricing, and collusion.
    *American Economic Review, 110*(10), 3267–3297.
    https://doi.org/10.1257/aer.20190623
    """
    nash_reward /= len(rewards)
    coll_reward /= len(rewards)

    rewards = split_array(rewards, window_size)
    avg_rewards = rewards.mean(axis=-1)

    cci = (avg_rewards - nash_reward) / (coll_reward - nash_reward)
    return cci

In [4]:
saver = storage.ExperimentStorage('./experiments')

In [8]:
n = 50_000          # Number of episodes
w = n // 1000       # Window size

nash_reward = 0.1  # Nash reward
coll_reward = 0.5  # Collusive reward

action_space = np.array([[0.0, 0.0], [0.6, 0.4], [0.8, 0.2], [1.0, 0.0]])

for i in range(10):
    if i % 10 == 0:
        print(f'Running {i} ...')

    agents: dict[str, Agent] = {
        'maker_u_0': MakerEXP3(epsilon=MakerEXP3.compute_epsilon(len(action_space), n), action_space=action_space, name='maker_u_0'),
        'maker_u_1': MakerEXP3(epsilon=MakerEXP3.compute_epsilon(len(action_space), n), action_space=action_space, name='maker_u_1'),
        'trader_0': NoPassTrader(name='trader_0'),
    }

    env = GMEnv(
        generate_vt = lambda: 0.5,
        n_episodes = n,
        n_makers_u = 2,
        n_makers_i = 0,
        n_traders = 1,
    )

    _, info = env.reset()

    for agent in env.agent_iter():

        action = agents[agent].act(env.observe(agent))
        _, rewards, _, _, infos = env.step(action)

        if infos['episode_finished']:
            for a in env.possible_agents:
                agents[a].update(rewards[a], infos[a])


    cci = get_calvano_collusion_index(
        np.array([agent.history.get_rewards() for name, agent in agents.items() if name in env.makers]),
        nash_reward = nash_reward,
        coll_reward = coll_reward,
        window_size = w
    )

    info = {
        'parmas' : {
            'n_episodes' : n,
            'window_size' : w,
            'action_space' : str(action_space),
            'agent_type' : [agent.__class__.__name__ for agent in agents.values()],
        },
        'most_common_action' : {
            n//w : {name : str(agent.history.compute_most_common(slice(-w, None))) for name, agent in agents.items() if name in env.makers}
        },
        'cumulative_rewards' : {
            0  : {name : round(float(agent.history.get_rewards(slice(0, w)).sum()), 3) for name, agent in agents.items()},
            n//w : {name : round(float(agent.history.get_rewards(slice(-w, None)).sum()), 3) for name, agent in agents.items()},
            'global' : env.cumulative_rewards
        },
        'cci' : {
            0  : {name : round(float(cci[idx, 0]), 3) for idx, name in enumerate(env.makers)},
            n//w  : {name : round(float(cci[idx, -1]), 3) for idx, name in enumerate(env.makers)},
            'global' : {name : round(float(cci[idx, :].mean()), 3) for idx, name in enumerate(env.makers)},
        },
        'seed' : {
            name : agent._seed for name, agent in agents.items()
        }
    }

    dir = saver.save_objects([env] + list(agents.values()), info=info)

    if info['cumulative_rewards'][n//w]['maker_u_0'] > 3.0 or info['cumulative_rewards'][n//w]['maker_u_1'] > 3.0:
        print(f'{i} -> CCI:{info["cci"][n//w]} CR:{info["cumulative_rewards"][n//w]} ({dir})')

print('Done')

Running 0 ...
0 -> CCI:{'maker_u_0': 0.47, 'maker_u_1': 0.49} CR:{'maker_u_0': 7.2, 'maker_u_1': 7.4, 'trader_0': -14.6} (./experiments/experiment_017_20250930_150958)
1 -> CCI:{'maker_u_0': 0.42, 'maker_u_1': 0.48} CR:{'maker_u_0': 6.7, 'maker_u_1': 7.3, 'trader_0': -14.0} (./experiments/experiment_018_20250930_151039)
2 -> CCI:{'maker_u_0': 0.5, 'maker_u_1': 0.5} CR:{'maker_u_0': 7.5, 'maker_u_1': 7.5, 'trader_0': -15.0} (./experiments/experiment_019_20250930_151118)
3 -> CCI:{'maker_u_0': 0.5, 'maker_u_1': 0.5} CR:{'maker_u_0': 7.5, 'maker_u_1': 7.5, 'trader_0': -15.0} (./experiments/experiment_020_20250930_151158)
4 -> CCI:{'maker_u_0': 0.5, 'maker_u_1': 0.5} CR:{'maker_u_0': 7.5, 'maker_u_1': 7.5, 'trader_0': -15.0} (./experiments/experiment_021_20250930_151239)
5 -> CCI:{'maker_u_0': 0.5, 'maker_u_1': 0.5} CR:{'maker_u_0': 7.5, 'maker_u_1': 7.5, 'trader_0': -15.0} (./experiments/experiment_022_20250930_151320)
6 -> CCI:{'maker_u_0': 0.42, 'maker_u_1': 0.48} CR:{'maker_u_0': 6.7, 