<a href="https://colab.research.google.com/github/MatteoOnger/algo-collusion-mm/blob/main/notebooks/notebook_exp3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Algorithmic Collusion in Market Making - EXP3

A notebook testing EXP3 agents implementing market-making strategies in the Glosten-Milgrom environment.

## Notebook Initialization

### Colab Environment Setup

In [None]:
# Do NOT run this cell in local environment - it's intended for Google Colab only.

# Clone GitHub repository
!git clone https://github.com/MatteoOnger/algo-collusion-mm.git

# Set working directory
%cd /content/algo-collusion-mm

# Install dependencies
!pip install --quiet .

### Local Environment Setup

In [1]:
# Do NOT run this cell in Google Colab - it's intended for local Jupyter Notebooks only.

# Autoreload imports
%load_ext autoreload
%autoreload 2

# Select interactive backend for matplotlib
%matplotlib widget

## Main Execution

In [None]:
import json
import matplotlib.pyplot as plt
import numpy as np
import os
import time

import algo_collusion_mm.utils.plots as plots
import algo_collusion_mm.utils.storage as storage

from datetime import datetime

from algo_collusion_mm.agents.agent import Agent
from algo_collusion_mm.agents.makers.uninformed.exp3 import MakerEXP3
from algo_collusion_mm.agents.traders.nopass import NoPassTrader
from algo_collusion_mm.envs import GMEnv
from algo_collusion_mm.utils.common import get_calvano_collusion_index
from algo_collusion_mm.utils.stats import OnlineVectorStats


plots.DECIMAL_PLACES_VALUES = 2

### Load Agents

In [None]:
saver = storage.ExperimentStorage(base_path=None)

objects = saver.load_objects('')
print(f'Objects loaded: {list(objects.keys())}')

### Run Multiple Episodes

In [None]:
r = 10              # Number of episodes
n = 10_000          # Number of rounds
k = 100             # Number of windows
w = n // k          # Window size

n_makers = 2        # Number of market makers

nash_reward = 0.1   # Nash reward (single-agent case)
coll_reward = 0.5   # Collusive reward (single-agent case)

# Prices and action space of the market makers
prices =  np.round(np.arange(0.0, 1.0 + 0.2, 0.2), 2)
action_space = np.array([(ask, bid) for ask in prices for bid in prices if (ask  > bid)])

# To compute online statistics
stats_cci = OnlineVectorStats((n_makers, k))
stats_action_freq = OnlineVectorStats((n_makers, len(action_space)))
stats_joint_action_freq = OnlineVectorStats((len(action_space),) * n_makers)
stats_rwd = OnlineVectorStats(n_makers)

# To save experimental results
saver = storage.ExperimentStorage(os.path.join('..', 'experiments', 'exp3', 'notebook', 'me'))

start_time = time.time()
current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
saver.print_and_save(f'Started at {current_time}')

for i in range(r):
    if i % 10 == 0:
        saver.print_and_save(f'Running {i} ...')

    agents: dict[str, Agent] = {
        'maker_u_0': MakerEXP3(epsilon=MakerEXP3.compute_epsilon(len(action_space), n), scale_rewards=lambda r: (r / 0.3), action_space=action_space, name='maker_u_0'),
        'maker_u_1': MakerEXP3(epsilon=MakerEXP3.compute_epsilon(len(action_space), n), scale_rewards=lambda r: (r / 0.3), action_space=action_space, name='maker_u_1'),
        'trader_0': NoPassTrader(name='trader_0', tie_breaker='rand'),
    }

    env = GMEnv(
        generate_vt = lambda: 0.5,
        n_rounds = n,
        n_makers_u = n_makers,
        n_makers_i = 0,
        n_traders = 1,
    )

    _, info = env.reset()

    for agent in env.agent_iter():
        action = agents[agent].act(env.observe(agent))
        _, rewards, _, _, infos = env.step(action)

        if infos['round_finished']:
            for a in env.possible_agents:
                agents[a].update(rewards[a], infos[a])

    # Compute calvano collusion idex per window and agent
    cci = get_calvano_collusion_index(
        np.array([agents[name].history.get_rewards() for name in env.makers]),
        nash_reward = nash_reward,
        coll_reward = coll_reward,
        window_size = w
    )

    # Collect info
    info = {
        'parmas' : {
            'n_rounds' : n,
            'window_size' : w,
            'action_space' : str(action_space).replace('\n', ','),
            'tie_breaker' : [agents[name].tie_breaker for name in env.traders],
            'epsilon' : [agents[name].epsilon for name in env.makers],
            'seed' : {name : agent._seed for name, agent in agents.items()},
            'agent_type' : [agent.__class__.__name__ for agent in agents.values()],
        },
        'freq_actions' : {
            0 : {name : str(agents[name].history.compute_freqs(slice(0, w))).replace('\n', '') for name in env.makers},
            k : {name : str(agents[name].history.compute_freqs(slice(-w, None))).replace('\n', '') for name in env.makers},
            'global' : {name : str(agents[name].history.compute_freqs()).replace('\n', '') for name in env.makers}
        },
        'most_common_action' : {
            0 : {name : str(agents[name].history.compute_most_common(slice(0, w))) for name in env.makers},
            k : {name : str(agents[name].history.compute_most_common(slice(-w, None))) for name in env.makers},
            'global' : {name : str(agents[name].history.compute_most_common()) for name in env.makers}
        },
        'cumulative_rewards' : {
            0 : {name : round(float(agent.history.get_rewards(slice(0, w)).sum()), 3) for name, agent in agents.items()},
            k : {name : round(float(agent.history.get_rewards(slice(-w, None)).sum()), 3) for name, agent in agents.items()},
            'global' : env.cumulative_rewards
        },
        'cci' : {
            0  : {name : round(float(cci[idx, 0]), 3) for idx, name in enumerate(env.makers)},
            k  : {name : round(float(cci[idx, -1]), 3) for idx, name in enumerate(env.makers)},
            'global' : {name : round(float(cci[idx, :].mean()), 3) for idx, name in enumerate(env.makers)},
        }
    }

    # Joint actions frequency
    joint_actions = np.array([
        agents[name].history.get_actions(slice(-w, None), return_index=True) for name in env.makers
    ]).T
    unique_joint_actions, freqs = np.unique(joint_actions, return_counts=True, axis=0)
    
    matrix = np.zeros(n_makers * (len(action_space),))
    matrix[tuple(unique_joint_actions.T)] = freqs / w

    # Update statistics
    stats_cci.update(cci)
    stats_action_freq.update(np.array([agents[maker].history.compute_freqs(slice(-w, None)) for maker in env.makers]) / w)
    stats_joint_action_freq.update(matrix)
    stats_rwd.update(np.array([env.cumulative_rewards[maker] for maker in env.makers]))

    # Save and print results
    dir = saver.save_episode([env] + list(agents.values()), info=info)
    saver.print_and_save(f'{(i+1):03} {"*" if (cci[:, -1] >= 0.45).any() else " "} -> CCI:{info["cci"][k]}'.ljust(60) + f' ({dir})')

end_time = time.time()
execution_time = end_time - start_time
current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
saver.print_and_save(f'Done at {current_time} | Execution time: {execution_time:.2f} seconds')

# Save plot
fig = plots.plot_all_stats(
    window_size = w,
    makers = [agents[maker] for maker in env.makers],
    stats_cci = stats_cci,
    stats_actions_freq = stats_action_freq,
    stats_joint_actions_freq = stats_joint_action_freq,
)
saver.save_figures({f'PLOT': fig})

# Save and print results
saver.save_objects({
    'stats_cci': stats_cci,
    'stats_action_freq': stats_action_freq,
    'stats_joint_action_freq': stats_joint_action_freq,
    'stats_rwd': stats_rwd
})
saver.print_and_save(
    f'Results:\n'
    f'- Last window:\n'
    f' - [CCI] Average: {np.round(stats_cci.get_mean()[:, -1], 4)}\n'
    f' - [CCI] Minimum: {np.round(stats_cci.get_min()[:, -1], 4)}\n'
    f' - [CCI] Maximum: {np.round(stats_cci.get_max()[:, -1], 4)}\n'
    f' - [CCI] Standard deviation: {np.round(stats_cci.get_std(sample=False)[:, -1], 4)}\n'
    f'- Global:\n'
    f' - [RWD] Average: {np.round(stats_rwd.get_mean(), 4)}\n'
    f' - [RWD] Standard deviation: {np.round(stats_rwd.get_std(sample=False), 4)}'
)

display(fig)
print(saver.base_path)

### Run Single Episode

In [None]:
saver = storage.ExperimentStorage(os.path.join('..', 'experiments', 'exp3', 'notebook', 'se'))

In [None]:
n = 50_000          # Number of rounds
k = 100             # Number of windows
w = n // k          # Window size

n_makers = 2        # Number of market makers

nash_reward = 0.1   # Nash reward (single-agent case)
coll_reward = 0.5   # Collusive reward (single-agent case)

counter = 0         # Number of rounds done

# Prices and action space of the market makers
prices =  np.round(np.arange(0.0, 1.0 + 0.2, 0.2), 2)
action_space = np.array([(ask, bid) for ask in prices for bid in prices if (ask  > bid)])

agents: dict[str, Agent] = {
    'maker_u_0': MakerEXP3(epsilon=MakerEXP3.compute_epsilon(len(action_space), n), scale_rewards=lambda r: (r / 0.3), action_space=action_space, name='maker_u_0'),
    'maker_u_1': MakerEXP3(epsilon=MakerEXP3.compute_epsilon(len(action_space), n), scale_rewards=lambda r: (r / 0.3), action_space=action_space, name='maker_u_1'),
    'trader_0': NoPassTrader(tie_breaker='rand', name='trader_0'),
}

env = GMEnv(
    generate_vt = lambda: 0.5,
    n_rounds = n,
    n_makers_u = n_makers,
    n_makers_i = 0,
    n_traders = 1,
)

start_time = time.time()
current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print(f'Started at {current_time}')

_, info = env.reset()
for agent in env.agent_iter():
    action = agents[agent].act(env.observe(agent))
    _, rewards, _, _, infos = env.step(action)

    if infos['round_finished']:
        if counter % 10_000 == 0:
            print(f'Running round {counter} ...')

        for a in env.possible_agents:
            # Save the current belif of the agent
            if a in env.makers and counter % (k//5) == 0:
                agents[a].history.record_extra(agents[a].probs.copy())
            
            agents[a].update(rewards[a], infos[a])
    
        counter += 1

end_time = time.time()
execution_time = end_time - start_time
current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print(f'Done at {current_time} | Execution time: {execution_time:.2f} seconds')

# Compute calvano collusion idex per window and agent
cci = get_calvano_collusion_index(
    np.array([agent.history.get_rewards() for name, agent in agents.items() if name in env.makers]),
    nash_reward = nash_reward,
    coll_reward = coll_reward,
    window_size = w
)

# Collect info
info = {
    'parmas' : {
        'n_rounds' : n,
        'window_size' : w,
        'action_space' : str(action_space).replace('\n', ','),
        'tie_breaker' : [agents[name].tie_breaker for name in env.traders],
        'epsilon' : [agents[name].epsilon for name in env.makers],
        'seed' : {name : agent._seed for name, agent in agents.items()},
        'agent_type' : [agent.__class__.__name__ for agent in agents.values()],
    },
    'freq_actions' : {
        0 : {name : str(agents[name].history.compute_freqs(slice(0, w))).replace('\n', '') for name in env.makers},
        k : {name : str(agents[name].history.compute_freqs(slice(-w, None))).replace('\n', '') for name in env.makers},
        'global' : {name : str(agents[name].history.compute_freqs()).replace('\n', '') for name in env.makers}
    },
    'most_common_action' : {
        0 : {name : str(agents[name].history.compute_most_common(slice(0, w))) for name in env.makers},
        k : {name : str(agents[name].history.compute_most_common(slice(-w, None))) for name in env.makers},
        'global' : {name : str(agents[name].history.compute_most_common()) for name in env.makers}
    },
    'cumulative_rewards' : {
        0 : {name : round(float(agent.history.get_rewards(slice(0, w)).sum()), 3) for name, agent in agents.items()},
        k : {name : round(float(agent.history.get_rewards(slice(-w, None)).sum()), 3) for name, agent in agents.items()},
        'global' : env.cumulative_rewards
    },
    'cci' : {
        0  : {name : round(float(cci[idx, 0]), 3) for idx, name in enumerate(env.makers)},
        k  : {name : round(float(cci[idx, -1]), 3) for idx, name in enumerate(env.makers)},
        'global' : {name : round(float(cci[idx, :].mean()), 3) for idx, name in enumerate(env.makers)},
    }
}

# Plot figure
fig = plots.plot_all(
    window_size = w,
    makers = [agents[maker] for maker in env.makers],
    makers_belief_name = 'probs',
    cci = cci,
    nash_reward = nash_reward,
    coll_reward = coll_reward,
    title = 'EXP3 Makers Summary Plots'
)

# Save results
dir =  saver.save_episode([env] + list(agents.values()), fig, info)

print(json.dumps(info, indent=2))
display(fig)
print(dir)

### Additional Plots

In [None]:
plt.close()

plots.plot_maker_belief_evolution(
    maker = agents['maker_u_0'],
    adaptive_scale= True
)

plt.show()

In [None]:
plt.close()

plots.plot_makers_best_actions(
    makers = [agents[maker] for maker in env.makers],
    true_value = 0.5
)

plt.show()