# The experment notebook

This notebook runs experiments and logs the results using mlflow. 

## Import relevant packages

In [None]:
import mlflow
import numpy as np
from typing import List, Dict
import itertools
import torch

from market import Market
import config
import insurer

## Find the current tracking URI

In [None]:
print(mlflow.get_tracking_uri())

## Helper functions 

These functions generate a grid of parameters that we can iterate through if running a grid search or iterating over random seeds

In [None]:
# Define helper functions to define parameter grids

def generate_grid(parameters: Dict[str, List]) -> List[Dict[str, float]]:
    grid = []
    keys = parameters.keys()
    values = parameters.values()

    for combination in itertools.product(*values):
        grid.append(dict(zip(keys, combination)))

    return grid

def set_gpus(
    parameter_grid: List[Dict[str, float]], num_gpus: int
) -> List[Dict[str, float]]:
    for ii, params in enumerate(parameter_grid):
        params["gpu_id"] = ii % num_gpus
    return parameter_grid

num_gpus = torch.cuda.device_count()

Find the number of GPUs available

In [None]:
print(f"Number of GPUs on device: {num_gpus}")

## Define the functions to run trials

We define functions to both interact the market and insurers together for one epoch, and to run the trial. Running the trial consists of running a collection of burn-in epochs, followed by testing epochs. 

In [None]:
# This function interacts the market and insurers for one epoch.
def interact_one_epoch(market, insurer_list):
    for time_step in range(config.ExperimentConfig.epoch_customers):
        # Get the customers for this time step.
        market.step()
        customer_features, expected_costs = market.observation()

        # Extract the number of customers for this time step
        num_customers = len(customer_features)

        # Create a np.ndarray to store the offers
        offers = np.zeros((num_customers, config.MarketConfig.num_insurers))

        # Iterate through insurers, getting the offers for each insurer:
        for insurer_idx in range(config.MarketConfig.num_insurers):
            offers[:, insurer_idx] = insurer_list[insurer_idx].make_offers(
                customer_features, expected_costs[:, insurer_idx]
            )

        responses, step_profits = market.response(offers)
        for insurer_idx in range(len(insurer_list)):
            # profits[insurer_idx] += step_profits[insurer_idx]
            insurer_list[insurer_idx].store_customers(
                customer_features,
                expected_costs[:, insurer_idx],
                offers,
                responses[:, insurer_idx],
            )

In [None]:
def run_trial(parameters):
    type_list = ["RL", "Null", "Null", "Null", "Null", "Null"]
    assert (
        len(type_list) == config.MarketConfig.num_insurers
    ), f"Number of insurers should be {config.MarketConfig.num_insurers}. Instead, got {len(type_list)} insurers"

    run_name = "_".join(
        [f"{hp}_{value}" for hp, value in parameters.items() if hp not in ["gpu_id"]]
    )

    experiment_name = "RL-results"

    mlflow.set_experiment(experiment_name)

    with mlflow.start_run(run_name=run_name):
        # Log each of the hyperparameters
        for hp, value in parameters.items():
            if hp not in ["gpu_id"]:
                mlflow.log_param(hp, value)

        # Create the market object
        market = Market()

        # Store the insurers and the normalised profits
        insurer_list = []

        # First, add the target insurer to the insurer_list
        for insurer_index in range(config.MarketConfig.num_insurers):
            insurer_type = type_list[insurer_index]

            insurer_list.append(
                insurer.Insurer(
                    idx=insurer_index, insurer_type=insurer_type, parameters=parameters
                )
            )

        # First we burn the insurers in.
        for burn_in_epoch in range(config.ExperimentConfig.burn_in_epochs):
            # Interact for one epoch
            interact_one_epoch(market, insurer_list)
            # Iterate through insurers
            for insurer_idx in range(len(insurer_list)):
                # Train the conversion, market, and bidding models
                insurer_list[insurer_idx].train_auxiliary_models()
                # Record all relevant variables
                insurer_list[insurer_idx].record_variables()
                # Reset all the relevant counters
                insurer_list[insurer_idx].epoch_reset()
        # Now we switch from burn-in mode to test-mode
        for insurer_idx in range(len(insurer_list)):
            insurer_list[insurer_idx].testing_mode()

        # Next we test the trained models
        for test_epochs in range(config.ExperimentConfig.test_epochs):
            # Interact for one epoch
            interact_one_epoch(market, insurer_list)
            # Iterate through insurers
            for insurer_idx in range(len(insurer_list)):
                # Record all the relevant variables
                insurer_list[insurer_idx].record_variables()
                # Reset all the relevant counters.
                insurer_list[insurer_idx].epoch_reset()


## Run the trial

We create a grid of parameters and then run a trial for each grid.

**Note**: The random seed is set via the "trial_number" parameter. 

In [None]:
parameters = {"trial_number":list(range(1))}

grid = generate_grid(parameters)

if num_gpus > 0:
    grid = set_gpus(grid, num_gpus)

In [None]:
for param_dict in grid:
    run_trial(param_dict)