<a href="https://colab.research.google.com/github/MatteoOnger/algo-collusion-mm/blob/main/notebooks/notebook_exp3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Algorithmic Collusion in Market Making - Hedge

A notebook testing Hedge agents implementing market-making strategies in the Glosten-Milgrom environment.

## Notebook Initialization

### Colab Environment Setup

In [None]:
# Do NOT run this cell in local environment - it's intended for Google Colab only.

# Clone GitHub repository
!git clone https://github.com/MatteoOnger/algo-collusion-mm.git

# Set working directory
%cd /content/algo-collusion-mm

# Install dependencies
!pip install --quiet .

### Local Environment Setup

In [1]:
# Do NOT run this cell in Google Colab - it's intended for local Jupyter Notebooks only.

# Autoreload imports
%load_ext autoreload
%autoreload 2

# Select interactive backend for matplotlib
%matplotlib widget

## Main Execution

In [2]:
import json
import matplotlib.pyplot as plt
import numpy as np
import os
import time

import algo_collusion_mm.utils.plots as plots
import algo_collusion_mm.utils.storage as storage

from datetime import datetime

from algo_collusion_mm.agents.agent import Agent
from algo_collusion_mm.agents.makers.informed.hedge import MakerHedge
from algo_collusion_mm.agents.traders.nopass import NoPassTrader
from algo_collusion_mm.envs import GMEnv
from algo_collusion_mm.utils.common import get_calvano_collusion_index, get_relative_deviation_competition


plots.DECIMAL_PLACES_VALUES = 2

### Load Agents

In [None]:
saver = storage.ExperimentStorage(base_path=None)

objects = saver.load_objects('')
print(f'Objects loaded: {list(objects.keys())}')

### Run Single Episode

In [3]:
saver = storage.ExperimentStorage(os.path.join('..', 'experiments', 'hedge', 'single_run'))

In [None]:
n = 20_000          # Number of rounds
k =    100          # Number of windows
w = n // k          # Window size

n_makers = 3        # Number of market makers

nash_reward = 0.05   # Nash reward (single-agent case)
coll_reward = 0.35   # Collusive reward (single-agent case)

counter = 0         # Number of rounds done

# Prices and action space of the market makers
prices =  np.round(np.arange(0.0, 1.0 + 0.2, 0.2), 2)
action_space = np.array([(ask, bid) for ask in prices for bid in prices if (ask  > bid)])

agents: dict[str, Agent] = {
    'maker_i_0': MakerHedge(epsilon=.0075, scale_rewards=lambda r: (r / 0.25), action_space=action_space, name='maker_i_0'),
    'maker_i_1': MakerHedge(epsilon=.0075, scale_rewards=lambda r: (r / 0.25), action_space=action_space, name='maker_i_1'),
    'maker_i_2': MakerHedge(epsilon=.0075, scale_rewards=lambda r: (r / 0.25), action_space=action_space, name='maker_i_2'),
    'trader_0': NoPassTrader(tie_breaker='rand', name='trader_0'),
}

env = GMEnv(
    generate_vt = lambda: 0.35,
    n_rounds = n,
    n_makers_u = 0,
    n_makers_i = n_makers,
    n_traders = 1,
    agents_action_space = action_space
)

start_time = time.time()
current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print(f'Started at {current_time}')

_, info = env.reset()
for agent in env.agent_iter():
    action = agents[agent].act(env.observe(agent))
    _, rewards, _, _, infos = env.step(action)

    if infos['round_finished']:
        if counter % 10_000 == 0:
            print(f'Running round {counter} ...')

        for a in env.possible_agents:
            # Save the current belif of the agent
            if a in env.makers:
                agents[a].history.record_extra(agents[a].weights.copy())
            
            agents[a].update(rewards[a], infos[a])
    
        counter += 1

end_time = time.time()
execution_time = end_time - start_time
current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print(f'Done at {current_time} | Execution time: {execution_time:.2f} seconds')

# Compute calvano collusion idex per window and agent
cci = get_calvano_collusion_index(
    np.array([agents[name].history.get_rewards() for name in env.makers]),
    nash_reward = nash_reward,
    coll_reward = coll_reward,
    window_size = w,
)

# Compute relative deviation from competition index per window and agent
rdc = get_relative_deviation_competition(
    np.array([agents[name].history.get_rewards() for name in env.makers]),
    nash_reward = nash_reward,
    window_size = w,
)

# Collect info
info = {
    'parmas' : {
        'n_rounds' : n,
        'window_size' : w,
        'action_space' : str(action_space).replace('\n', ','),
        'tie_breaker' : [agents[name].tie_breaker for name in env.traders],
        'epsilon' : [agents[name].epsilon for name in env.makers],
        'seed' : {name : agent._seed for name, agent in agents.items()},
        'agent_type' : [agent.__class__.__name__ for agent in agents.values()],
    },
    'freq_actions' : {
        0 : {name : str(agents[name].history.compute_freqs(slice(0, w))).replace('\n', '') for name in env.makers},
        k : {name : str(agents[name].history.compute_freqs(slice(-w, None))).replace('\n', '') for name in env.makers},
        'global' : {name : str(agents[name].history.compute_freqs()).replace('\n', '') for name in env.makers}
    },
    'most_common_action' : {
        0 : {name : str(agents[name].history.compute_most_common(slice(0, w))) for name in env.makers},
        k : {name : str(agents[name].history.compute_most_common(slice(-w, None))) for name in env.makers},
        'global' : {name : str(agents[name].history.compute_most_common()) for name in env.makers}
    },
    'cumulative_rewards' : {
        0 : {name : round(float(agent.history.get_rewards(slice(0, w)).sum()), 3) for name, agent in agents.items()},
        k : {name : round(float(agent.history.get_rewards(slice(-w, None)).sum()), 3) for name, agent in agents.items()},
        'global' : env.cumulative_rewards
    },
    'cci' : {
        0  : {name : round(float(cci[idx, 0]), 3) for idx, name in enumerate(env.makers)},
        k  : {name : round(float(cci[idx, -1]), 3) for idx, name in enumerate(env.makers)},
        'global' : {name : round(float(cci[idx, :].mean()), 3) for idx, name in enumerate(env.makers)},
    },
    'rdc' : {
        0  : {name : round(float(rdc[idx, 0]), 3) for idx, name in enumerate(env.makers)},
        k  : {name : round(float(rdc[idx, -1]), 3) for idx, name in enumerate(env.makers)},
        'global' : {name : round(float(rdc[idx, :].mean()), 3) for idx, name in enumerate(env.makers)},
    }
}

# Plot figure
fig = plots.plot_all(
    window_size = w,
    makers = [agents[maker] for maker in env.makers],
    makers_belief_name = 'probs',
    cci = cci,
    nash_reward = nash_reward,
    coll_reward = coll_reward,
    title = 'Hedge Makers Summary Plots'
)

# Save results
dir =  saver.save_episode([env] + list(agents.values()), fig, info)

print(json.dumps(info, indent=2))
display(fig)
print(dir)

### Additional Plots

In [None]:
plt.close()

fig, axes = plt.subplots(2, n_makers, figsize=(4*n_makers, 9))
for i, name in enumerate(env.makers):
    plots.plot_maker_actions(
        agents[name],
        round_range = slice(20),
        ax = axes[0, i]
    )

    plots.plot_maker_rewards(
        agents[name],
        round_range = slice(20),
        ax = axes[1, i]
    )

plt.show()

In [None]:
plt.close()

plots.plot_maker_belief_evolution_sc(
    agents['maker_i_0'],
    curr_idx = 5000,
    next_idx = 10000
)

plt.show()