<a href="https://colab.research.google.com/github/MatteoOnger/algo-collusion-mm/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Algorithmic Collusion in Market Making - EXP3

A notebook testing various EXP3 agents implementing market-making strategies in the Glosten-Milgrom environment.

## Notebook Initialization

### Colab Environment Setup

In [None]:
# Do NOT run this cell in local environment - it's intended for Google Colab only.

# Clone GitHub repository
!git clone https://github.com/MatteoOnger/algo-collusion-mm.git

# Install dependencies
!pip install --quiet -r /content/algo-collusion-mm/requirements.txt

# Set working directory
%cd /content/algo-collusion-mm

### Local Environment Setup

In [1]:
# Do NOT run this cell in Google Colab - it's intended for local Jupyter Notebooks only.

# Autoreload imports
%load_ext autoreload
%autoreload 2

# Select interactive backend for matplotlib
%matplotlib widget

## Main Execution

In [None]:
import itertools
import json
import matplotlib.pyplot as plt
import numpy as np
import time

import src.utils.gtu as gtu
import src.utils.plots as plots
import src.utils.storage as storage

from datetime import datetime

from src.agents.agent import Agent
from src.agents.makers.exp3 import MakerEXP3
from src.agents.traders.nopass import NoPassTrader
from src.envs import GMEnv
from src.utils.common import scale_rewards_array, get_calvano_collusion_index
from src.utils.stats import OnlineVectorStats


plots.DECIMAL_PLACES_VALUES = 0

### Load Agents

In [None]:
saver = storage.ExperimentStorage('./experiments/exp3')

objects = saver.load_objects('')
print(f'Objects loaded: {list(objects.keys())}')

### Multiple Runs

In [None]:
r = 100             # Number of experiments
n = 25_000          # Number of episodes
k = 100             # Number of windows
w = n // k          # Window size

n_makers = 2        # Number of market makers

nash_reward = 0.1   # Nash reward (single-agent case)
coll_reward = 0.5   # Collusive reward (single-agent case)

# Prices and action space of the market makers
# action_space = np.array([[0.0, 0.0], [.6, .4], [.68, .32]])
prices =  np.round(np.arange(0.0, 1.0 + 0.2, 0.2), 2)
action_space = np.array([(ask, bid) for ask in prices for bid in prices if (ask  > bid)])

# Min CCI of the last window per each experiment
final_cci = np.zeros(r)
# Min cumulative reward of the last window per each experiment
final_cum_rewards = np.zeros(r)

# To compute online statistics
stats_cci = OnlineVectorStats(n_makers)
stats_rwd = OnlineVectorStats(n_makers)

# To save experimental results
saver = storage.ExperimentStorage('./experiments/exp3')

start_time = time.time()
current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
saver.print_and_save(f'Started at {current_time}')

for i in range(r):
    if i % 10 == 0:
        saver.print_and_save(f'Running {i} ...')

    agents: dict[str, Agent] = {
        'maker_u_0': MakerEXP3(epsilon=MakerEXP3.compute_epsilon(len(action_space), n), scale_rewards=lambda r: (r / 0.3), action_space=action_space, name='maker_u_0'),
        'maker_u_1': MakerEXP3(epsilon=MakerEXP3.compute_epsilon(len(action_space), n), scale_rewards=lambda r: (r / 0.3), action_space=action_space, name='maker_u_1'),
        'trader_0': NoPassTrader(name='trader_0', tie_breaker='rand'),
    }

    env = GMEnv(
        generate_vt = lambda: 0.5,
        n_episodes = n,
        n_makers_u = n_makers,
        n_makers_i = 0,
        n_traders = 1,
    )

    _, info = env.reset()

    for agent in env.agent_iter():
        action = agents[agent].act(env.observe(agent))
        _, rewards, _, _, infos = env.step(action)

        if infos['episode_finished']:
            for a in env.possible_agents:
                agents[a].update(rewards[a], infos[a])

    # Compute calvano collusion idex per window and agent
    cci = get_calvano_collusion_index(
        np.array([agents[name].history.get_rewards() for name in env.makers]),
        nash_reward = nash_reward,
        coll_reward = coll_reward,
        window_size = w
    )

    # Collect info
    info = {
        'parmas' : {
            'n_episodes' : n,
            'window_size' : w,
            'action_space' : str(action_space).replace('\n', ','),
            'epsilon' : [agents[name].epsilon for name in env.makers],
            'agent_type' : [agent.__class__.__name__ for agent in agents.values()]
        },
        'freq_actions' : {
            0 : {name : str(agents[name].history.compute_freqs(slice(0, w))).replace('\n', '') for name in env.makers},
            k : {name : str(agents[name].history.compute_freqs(slice(-w, None))).replace('\n', '') for name in env.makers},
            'global' : {name : str(agents[name].history.compute_freqs()).replace('\n', '') for name in env.makers}
        },
        'most_common_action' : {
            0 : {name : str(agents[name].history.compute_most_common(slice(0, w))) for name in env.makers},
            k : {name : str(agents[name].history.compute_most_common(slice(-w, None))) for name in env.makers},
            'global' : {name : str(agents[name].history.compute_most_common()) for name in env.makers}
        },
        'cumulative_rewards' : {
            0 : {name : round(float(agent.history.get_rewards(slice(0, w)).sum()), 3) for name, agent in agents.items()},
            k : {name : round(float(agent.history.get_rewards(slice(-w, None)).sum()), 3) for name, agent in agents.items()},
            'global' : env.cumulative_rewards
        },
        'cci' : {
            0  : {name : round(float(cci[idx, 0]), 3) for idx, name in enumerate(env.makers)},
            k  : {name : round(float(cci[idx, -1]), 3) for idx, name in enumerate(env.makers)},
            'global' : {name : round(float(cci[idx, :].mean()), 3) for idx, name in enumerate(env.makers)},
        },
        'seed' : {
            name : agent._seed for name, agent in agents.items()
        }
    }

    # Update statistics
    stats_cci.update(cci[:, -1])
    stats_rwd.update(np.array([env.cumulative_rewards[name] for name in env.makers]))

    # Update last window's statistics
    final_cci[i] = cci[:, -1].min()
    final_cum_rewards[i] = np.array([agents[name].history.get_rewards(slice(-w, None)).sum() for name in env.makers]).min()

    # Save and print results
    dir = saver.save_experiment([env] + list(agents.values()), info=info)
    saver.print_and_save(f'{(i+1):03} {"*" if cci[0, -1] >= 0.45 or cci[1, -1] >= 0.45 else " "} -> CCI:{info["cci"][n//w]}'.ljust(60) + f' ({dir})')

end_time = time.time()
execution_time = end_time - start_time
current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
saver.print_and_save(f'Done at {current_time} | Execution time: {execution_time:.2f} seconds')

# Save and print results
saver.save_objects({'final_cci': final_cci, 'final_cum_rewards': final_cum_rewards})
saver.print_and_save(
    f'Results:\n'
    f'- [CCI] Average in the last window: {np.round(stats_cci.get_mean(), 4)}\n'
    f'- [CCI] Standard deviation in the last window: {np.round(stats_cci.get_std(), 4)}\n'
    f'- [RWD] Global average: {np.round(stats_rwd.get_mean(), 4)}\n'
    f'- [RWD] Global standard deviation: {np.round(stats_rwd.get_std(), 4)}'
)

### Single Run

In [None]:
saver = storage.ExperimentStorage('./experiments/exp3')

In [None]:
n = 25_000          # Number of episodes
k = 100             # Number of windows
w = n // k          # Window size

n_makers = 2        # Number of market makers

nash_reward = 0.1   # Nash reward (single-agent case)
coll_reward = 0.5   # Collusive reward (single-agent case)

counter = 0         # Number of episodes done

# Prices and action space of the market makers
# action_space = np.array([[0.0, 0.0], [.6, .4], [.68, .32]])
prices =  np.round(np.arange(0.0, 1.0 + 0.2, 0.2), 2)
action_space = np.array([(ask, bid) for ask in prices for bid in prices if (ask  > bid)])

agents: dict[str, Agent] = {
    'maker_u_0': MakerEXP3(epsilon=MakerEXP3.compute_epsilon(len(action_space), n), scale_rewards=lambda r: (r / 0.3), action_space=action_space, name='maker_u_0'),
    'maker_u_1': MakerEXP3(epsilon=MakerEXP3.compute_epsilon(len(action_space), n), scale_rewards=lambda r: (r / 0.3), action_space=action_space, name='maker_u_1'),
    'trader_0': NoPassTrader(tie_breaker='rand', name='trader_0'),
}

env = GMEnv(
    generate_vt = lambda: 0.5,
    n_episodes = n,
    n_makers_u = n_makers,
    n_makers_i = 0,
    n_traders = 1,
)

start_time = time.time()
current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print(f'Started at {current_time}')

_, info = env.reset()
for agent in env.agent_iter():
    action = agents[agent].act(env.observe(agent))
    _, rewards, _, _, infos = env.step(action)

    if infos['episode_finished']:
        if counter % 10_000 == 0:
            print(f'Running episode {counter} ...')

        for a in env.possible_agents:
            # Save the current belif of the agent
            if a in env.makers and counter % (k//5) == 0:
                agents[a].history.record_extra(agents[a].weights.copy())
            
            agents[a].update(rewards[a], infos[a])
    
        counter += 1

end_time = time.time()
execution_time = end_time - start_time
current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print(f'Done at {current_time} | Execution time: {execution_time:.2f} seconds')

# Compute calvano collusion idex per window and agent
cci = get_calvano_collusion_index(
    np.array([agent.history.get_rewards() for name, agent in agents.items() if name in env.makers]),
    nash_reward = nash_reward,
    coll_reward = coll_reward,
    window_size = w
)

# Collect info
info = {
    'parmas' : {
        'n_episodes' : n,
        'window_size' : w,
        'action_space' : str(action_space).replace('\n', ','),
        'epsilon' : [agents[name].epsilon for name in env.makers],
        'tie_breaker' : [agents[name].tie_breaker for name in env.traders],
        'agent_type' : [agent.__class__.__name__ for agent in agents.values()]
    },
    'freq_actions' : {
        0 : {name : str(agents[name].history.compute_freqs(slice(0, w))).replace('\n', '') for name in env.makers},
        k : {name : str(agents[name].history.compute_freqs(slice(-w, None))).replace('\n', '') for name in env.makers},
        'global' : {name : str(agents[name].history.compute_freqs()).replace('\n', '') for name in env.makers}
    },
    'most_common_action' : {
        0 : {name : str(agents[name].history.compute_most_common(slice(0, w))) for name in env.makers},
        k : {name : str(agents[name].history.compute_most_common(slice(-w, None))) for name in env.makers},
        'global' : {name : str(agents[name].history.compute_most_common()) for name in env.makers}
    },
    'cumulative_rewards' : {
        0 : {name : round(float(agent.history.get_rewards(slice(0, w)).sum()), 3) for name, agent in agents.items()},
        k : {name : round(float(agent.history.get_rewards(slice(-w, None)).sum()), 3) for name, agent in agents.items()},
        'global' : env.cumulative_rewards
    },
    'cci' : {
        0  : {name : round(float(cci[idx, 0]), 3) for idx, name in enumerate(env.makers)},
        k  : {name : round(float(cci[idx, -1]), 3) for idx, name in enumerate(env.makers)},
        'global' : {name : round(float(cci[idx, :].mean()), 3) for idx, name in enumerate(env.makers)},
    },
    'seed' : {
        name : agent._seed for name, agent in agents.items()
    }
}

# Plot figure
fig = plots.plot_all(
    window_size = w,
    makers = {name:agent for name, agent in agents.items() if name in env.makers},
    cci = cci,
    makers_belif = {name:agent.weights for name, agent in agents.items() if name in env.makers},
    nash_reward = nash_reward,
    coll_reward = coll_reward,
    title = 'EXP3 Makers Summary Plots'
)

# Save results
dir =  saver.save_experiment([env] + list(agents.values()), fig, info)

print(json.dumps(info, indent=2))
display(fig)
print(dir)

### Additional Plots

In [None]:
epsilons = np.arange(1, 100) / 100

saver = storage.ExperimentStorage('./experiments/exp3/varying_epsilon')

objs = saver.load_objects('./experiments/exp3/varying_epsilon')
final_cci, final_cum_rewards = objs['final_cci'], scale_rewards_array(objs['final_cum_rewards'],  n_episodes=20_000)

plt.close()

figure, ax1 = plt.subplots()
ax2 = ax1.twinx()

ax1.plot(epsilons, final_cci, color='#1f77b4', label='1k episodes')

ax2.scatter(epsilons, final_cum_rewards, color='red', alpha=0.5, s=2, marker='o', edgecolor='none')

ax1.axhline(0, linestyle='--', color='black', label='Nash')
ax1.axhline(1, linestyle='--', color='green', label='Coll')

ax1.axvline(MakerEXP3.compute_epsilon(3,  20_000*0.25), linestyle=':', color='#1f77b4')

ax1.set_xlabel('Epsilon')
ax1.set_ylabel('CCI')
ax2.set_ylabel('Scaled Cumulative Reward')
ax1.set_title('(Min) CCI and Cumulative Rewards wrt Epsilon')

lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(lines1 + lines2, labels1 + labels2, loc='best', ncol=3)
ax1.grid(True)

plt.show()

saver.save_figures({'figure': figure})

In [None]:
agent_name = 'maker_u_0'
agent = agents[agent_name]

plt.close()

plots.plot_maker_belif_evolution(
    indexes = agent.price_to_index(agent.action_space),
    labels = agent.prices,
    values = np.stack(agent.history.get_extras()),
    log_scale = False,
    adaptive_scale = False,
    agent_name = agent_name,
)

plt.show()

In [None]:
plt.close()

plots.plot_makers_best_actions(
    true_value = 0.5,
    actions = np.stack([objects['maker_u_0'].history.get_actions(), objects['maker_u_1'].history.get_actions()]),
    agents_name = list(objects.keys())
)

plt.show()

### Offline Game Analysis

In [None]:
# Find all pure Nash equilibria (NE) and pure coarse correlated equilibria (CCE)
# prices =  np.round(np.arange(0.0, 1.0 + 0.2, 0.2), 2)
# action_space = np.array([(ask, bid) for ask in prices for bid in prices if (ask  > bid)])
# action_space = np.array([[.6, .4], [.8, .2]])

n_makers = 2
action_spaces = np.repeat(action_space[None, :], repeats=n_makers, axis=0)

print(f'Action spaces shape: {action_spaces.shape}')

joint_action_space, rewards = gtu.compute_joint_actions_and_rewards(action_spaces, true_value=0.5, tie_breaker='rand')

print(f'Joint action space shape: {joint_action_space.shape}')
print(f'Rewards shape: {rewards.shape}')
print('--------------------------------------------')

scaled_rewards = rewards

start_time = time.time()
print('Search pure CCEs:')

for a in itertools.product(*[range(s) for s in rewards.shape[:-1]]):
    prof = np.zeros(rewards.shape[:-1])
    prof[a] = 1.0
    if gtu.is_cce(scaled_rewards, prof):
        print(f"- {a} -> {str(joint_action_space[a].swapaxes(-1, -2)).replace('\n', '')} is a CCE")

end_time = time.time()
execution_time = end_time - start_time
current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print(f'Done at {current_time} | Execution time: {execution_time:.2f} seconds')
print('--------------------------------------------')

start_time = time.time()
print('Search pure NEs:')

for a in itertools.product(*[range(s) for s in rewards.shape[:-1]]):
    prof = np.zeros((n_makers, len(action_space)))
    prof[np.arange(n_makers), a] = 1.0
    if gtu.is_ne(scaled_rewards, prof):
        print(f"- {a} -> {str(joint_action_space[a].swapaxes(-1, -2)).replace('\n', '')} is a NE")

end_time = time.time()
execution_time = end_time - start_time
current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print(f'Done at {current_time} | Execution time: {execution_time:.2f} seconds')
print('--------------------------------------------')