In [59]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from scipy.stats import norm
from scipy.stats import beta
from scipy.stats import nbinom
import scipy.stats as stats
import itertools
from scipy import spatial
from collections import namedtuple
from collections import deque
import copy
from math import trunc

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

# set up matplotlib
is_ipython = 'inline' in matplotlib.get_backend()
if is_ipython:
    from IPython import display

plt.ion()

# Run on GPU through metal device
device = torch.device('mps')

# Seat Pricing Simulation

In [2]:
from dataclasses import dataclass


@dataclass
class Customer:
    """Class for keeping track of specific Customer status, note, a Customer can be with multiple together."""
    wtp: float
    nr_in_group: int
    price_bought: float
    bought_seat: bool

    def total_sold(self) -> float:
        if self.bought_seat:
            return self.nr_in_group * self.price_bought
        return 0.0


In [29]:
customerdt = np.dtype([('wtp', np.float64), ('nr_in_group', np.int32), ('price_bought', np.float64), ('bought_seat', bool)])

COMPLEX_SEATING = True

class SeatSimulationFlight:
    def __init__(self, total_nr_customers: int, wtp_mu: float, wtp_sigma: float, wtp_scale: float,
                 seats_available: int, prices_offered: np.ndarray):
        self.total_nr_customers = total_nr_customers
        self.wtp_mu = wtp_mu
        self.wtp_sigma = wtp_sigma
        self.wtp_scale = wtp_scale
        self.total_seats = seats_available
        self.seats_available = seats_available
        self.seats_sold = np.full(seats_available, False, dtype=bool)
        self.prices_offered = np.copy(prices_offered)
        self.lowest_price = np.min(self.prices_offered)
        customer_list = self.create_customer_list()
        self.nr_bookings = len(customer_list)
        self.customers = np.empty(len(customer_list), dtype=customerdt)
        i = 0
        for customer in customer_list:
            self.customers[i] = (customer.wtp, customer.nr_in_group, customer.price_bought, customer.bought_seat)
            i += 1

    def create_customer_list(self):
        nr_customers_divided = 0
        customer_list = list()
        #wtp_scale = self.wtp_scale * 0.8
        while nr_customers_divided < self.total_nr_customers:
            booking_size = 1 + np.random.binomial(8, 0.2)
            if booking_size > self.total_nr_customers - nr_customers_divided:
                pass
            wtp = np.random.lognormal(self.wtp_mu, self.wtp_sigma) * self.wtp_scale
            customer_to_add = Customer(wtp, booking_size, 0.0, False)
            customer_list.append(customer_to_add)
            nr_customers_divided += booking_size
            #wtp_scale = wtp_scale * 1.05
        return customer_list

    def update_price_offer(self, price_offer: np.ndarray):
        self.prices_offered = price_offer
        self.lowest_price = np.min(self.prices_offered)

    def adjacent_seats_available(self, seats_necessary: int):
        available_together = 0
        idx = 0
        for seat_available in self.seats_sold:
            if seat_available:
                available_together += 1
                if available_together >= seats_necessary:
                    return True
            elif idx % 9 == 0:
                available_together = 0
            else:
                available_together = 0
            idx += 1
        return False

    def sell_seat2(self, customer: int):
        resulting_price = 0
        seats_sold = 0
        if self.lowest_price > self.customers[customer]['wtp']:
            return True, resulting_price, seats_sold
        # potential_seats = self.prices_offered[self.prices_offered < self.customers[customer]['wtp']]
        # if len(potential_seats) < 1:
        #     return resulting_price, seats_sold
        if (self.customers[customer]['bought_seat'] == False and
                self.customers[customer]['nr_in_group'] < self.seats_available):
            seats_available_together = 0
            seat_factor = 4 if self.customers[customer]['nr_in_group'] == 1 else 1
            for i in range(self.total_seats):
                if COMPLEX_SEATING:
                    seat_price = self.prices_offered[i]
                    index_in_row = i - ( (i // 6) * 6)
                    if index_in_row == 1 or index_in_row == 4:
                        # Multiply the seat price instead of dividing the customers' WTP, has same effect
                        seat_price *= seat_factor
                    if (seat_price < self.customers[customer]['wtp'] and
                            not self.seats_sold[i]):
                        seats_available_together += 1
                    elif i % 6 == 0:
                        # New row in the aircraft
                        seats_available_together = 0
                    else:
                        # Looped through a seat that is either sold or is too expensive
                        seats_available_together = 0
                else:
                    if (self.prices_offered[i] < self.customers[customer]['wtp'] and
                            not self.seats_sold[i]):
                        seats_available_together += 1
                    else:
                        seats_available_together = 0
                if seats_available_together == self.customers[customer]['nr_in_group']:
                    self.seats_sold[i-(seats_available_together-1) : i+1] = True
                    resulting_price = sum(self.prices_offered[i-(seats_available_together-1) : i+1])
                    price_selected = min(self.prices_offered[i-(seats_available_together-1) : i+1])
                    self.prices_offered[i-(seats_available_together-1):i+1] = 99999
                    if price_selected == self.lowest_price:
                        self.lowest_price = np.min(self.prices_offered)
                    self.customers[customer]['bought_seat'] = True
                    self.customers[customer]['price_bought'] = self.prices_offered[i]
                    seats_sold = self.customers[customer]['nr_in_group']
                    self.seats_available -= self.customers[customer]['nr_in_group']
                    break
            return True, resulting_price, seats_sold
        else:
            return False, resulting_price, seats_sold

    def get_nr_customers(self, customer_index: int):
        return self.customers[customer_index]['nr_in_group']

    def sell_seat(self, price_offered: float):
        resulting_price = 0.0
        seats_sold = 0

        eligible_customers = self.customers[(self.customers['bought_seat'] == False) &
                                            (self.customers['nr_in_group'] < self.seats_available) &
                                            (self.customers['wtp'] > price_offered)]
        if len(eligible_customers) > 0:
            eligible_customers[0]['bought_seat'] = True
            eligible_customers[0]['price_bought'] = price_offered
            resulting_price = price_offered * eligible_customers[0]['nr_in_group']
            seats_sold = eligible_customers[0]['nr_in_group']
            self.seats_available -= eligible_customers[0]['nr_in_group']
        return resulting_price, seats_sold

    def theoretical_max(self):
        ordered_customers = self.customers[np.argsort(-self.customers['wtp'])]
        idx = 0
        max_seats_available = self.seats_available
        total_revenue = 0
        total_seats_sold = 0
        while max_seats_available > 0 and idx < len(ordered_customers):
            customers_in_group = ordered_customers[idx]['nr_in_group']
            if customers_in_group <= max_seats_available:
                max_seats_available -= customers_in_group
                total_revenue += customers_in_group * ordered_customers[idx]['wtp']
                total_seats_sold += customers_in_group
            idx += 1
        return total_revenue, total_seats_sold




# Price Optimisation

In [60]:
class PricingValueIteration:
    def __init__(self, price_points: list, max_iterations: int, calculate_value_function: bool, seats_available: int):
        """
        Initialize the PricingValueIteration class for multiple purchases with price monotonicity assumption.

        :param price_points: List of possible price points (actions).
        :param max_iterations: Maximum number of iterations for value iteration.
        :param calculate_value_function: Whether to calculate the value function upon initialization.
        """
        self.name = "Value-Iteration"
        self.seats_available = seats_available
        self.price_points = price_points.tolist()  # List of possible prices (actions)
        self.num_prices = len(price_points)
        self.choices = []
        self.alphas = np.ones(self.num_prices, dtype=int)  # Shape parameter (α) for Gamma distribution
        self.betas = np.ones(self.num_prices, dtype=int)   # Rate parameter (β) for Gamma distribution
        self.current_state = [self.alphas.copy(), self.betas.copy()]
        self.wins = np.zeros(self.num_prices, dtype=int)
        self.pulls = np.zeros(self.num_prices, dtype=int)
        self.max_iterations = max_iterations
        self.gamma = 0.99  # Discount factor
        self.max_calculation_range = 300  # Cap for alpha and beta to prevent overflow
        self.D_max = 10  # Maximum number of purchases to consider in value iteration

        # Initialize the value function as a 3D array: alpha x beta x price_index
        self.value_function = np.zeros((self.max_calculation_range + self.D_max + 1,
                                        self.max_calculation_range + 1,
                                        self.num_prices))

        self.differences_in_value_function = []

        if calculate_value_function:
            self.calculate_value_function()

    def initialise_for_flight_type(self, flight_type):
        return
        
    def reset(self):
        """
        Reset the class to its initial state.
        """
        self.choices = []
        self.wins = np.zeros(self.num_prices, dtype=int)
        self.pulls = np.zeros(self.num_prices, dtype=int)
        self.alphas = np.ones(self.num_prices, dtype=int)
        self.betas = np.ones(self.num_prices, dtype=int)
        self.current_state = [self.alphas.copy(), self.betas.copy()]

    def calculate_value_function(self):
        """
        Calculate the value function using value iteration for multiple purchases with price monotonicity assumption.
        """
        theta = 0.001  # Convergence threshold

        for iteration in range(self.max_iterations):
            delta = 0.0

            # Iterate over all possible states (alpha, beta) for each price point
            for alpha in range(1, self.max_calculation_range + self.D_max + 1):
                for beta in range(1, self.max_calculation_range + 1):
                    for price_index in range(self.num_prices):
                        v = self.value_function[alpha, beta, price_index]

                        price = self.price_points[price_index]

                        # Expected immediate reward is price * expected demand (E[λ]) at current state
                        expected_reward = price * (alpha / beta)

                        # Compute the expected future value over possible D (number of purchases)
                        expected_future_value = 0.0

                        # Negative Binomial parameters
                        r = alpha
                        p = beta / (beta + 1)

                        # Consider possible D from 0 to D_max
                        for D in range(0, self.D_max + 1):
                            # Probability of observing D purchases
                            P_D = nbinom.pmf(D, r, p)

                            # Update alpha and beta according to monotonicity assumption
                            if D > 0:
                                # Purchases occurred, update alphas for prices <= current price_index
                                alpha_new = alpha + D
                                beta_new = beta  # beta remains the same
                            else:
                                # No purchases, update beta for prices >= current price_index
                                alpha_new = alpha
                                beta_new = beta + 1

                            # Ensure indices are within bounds
                            if alpha_new >= self.value_function.shape[0]:
                                alpha_new = self.value_function.shape[0] - 1
                            if beta_new >= self.value_function.shape[1]:
                                beta_new = self.value_function.shape[1] - 1

                            next_value = self.value_function[alpha_new, beta_new, price_index]
                            expected_future_value += P_D * next_value

                        # Update the value function
                        max_value = expected_reward + self.gamma * expected_future_value
                        self.value_function[alpha, beta, price_index] = max_value
                        delta = max(delta, abs(v - max_value))

            self.differences_in_value_function.append(delta)

            if delta < theta:
                print(f"Value function converged after {iteration + 1} iterations.")
                break
        else:
            print("Value iteration did not converge within the maximum number of iterations.")

    def get_action(self, state, flight_type):
        price = self.choose()
        prices = np.full(self.seats_available, price, dtype=float)
        return prices, 1
        
    def choose(self):
        """
        Choose the optimal price based on the current state and value function.

        :return: The selected price point.
        """
        state_alpha = self.alphas.copy()
        state_beta = self.betas.copy()
        max_value = -np.inf
        max_price_index = 0

        # Evaluate the expected value for each price point
        for price_index in range(self.num_prices):
            alpha = state_alpha[price_index]
            beta = state_beta[price_index]
            price = self.price_points[price_index]

            # Expected immediate reward is price * expected demand (E[λ])
            expected_reward = price * (alpha / beta)

            # Compute the expected future value over possible D
            expected_future_value = 0.0

            # Negative Binomial parameters
            r = alpha
            p = beta / (beta + 1)

            for D in range(0, self.D_max + 1):
                # Probability of observing D purchases
                P_D = nbinom.pmf(D, r, p)

                # Update alpha and beta according to monotonicity assumption
                if D > 0:
                    # Purchases occurred, update alphas for prices <= current price_index
                    alpha_new = alpha + D
                    beta_new = beta  # beta remains the same
                else:
                    # No purchases, update beta for prices >= current price_index
                    alpha_new = alpha
                    beta_new = beta + 1

                # Ensure indices are within bounds
                if alpha_new >= self.value_function.shape[0]:
                    alpha_new = self.value_function.shape[0] - 1
                if beta_new >= self.value_function.shape[1]:
                    beta_new = self.value_function.shape[1] - 1

                next_value = self.value_function[alpha_new, beta_new, price_index]
                expected_future_value += P_D * next_value

            # Total expected value
            new_value = expected_reward + self.gamma * expected_future_value

            if new_value > max_value:
                max_value = new_value
                max_price_index = price_index

        return self.price_points[max_price_index]

    def process_data(self, chosen_price, state, prediction, flight_revenue, end_state, customers_offered, flight_type_idx):
        initial_seats = np.sum(state)
        resulting_seats = np.sum(end_state)
        purchases = initial_seats - resulting_seats
        price = chosen_price[0]
        self.process_choice(price, purchases)

    def process_choice(self, chosen_price, purchases):
        """
        Update the state based on the chosen price and the number of purchases,
        considering the price monotonicity assumption.

        :param chosen_price: The price that was offered.
        :param purchases: Number of purchases made (integer >= 0).
        """
        price_index = self.price_points.index(chosen_price)
        self.pulls[price_index] += 1
        self.wins[price_index] += purchases * chosen_price  # Total revenue from this price point
        self.choices.append(chosen_price)

        if purchases > 0:
            # If purchases occurred, update alphas for all prices <= chosen price
            for i in range(price_index + 1):
                self.alphas[i] += purchases
                if self.alphas[i] > self.value_function.shape[0] - self.D_max - 1:
                    self.alphas[i] = self.value_function.shape[0] - self.D_max - 1
        else:
            # If no purchases, update betas for all prices >= chosen price
            for i in range(price_index, self.num_prices):
                self.betas[i] += 1
                if self.betas[i] > self.value_function.shape[1] - 1:
                    self.betas[i] = self.value_function.shape[1] - 1

        self.current_state = [self.alphas.copy(), self.betas.copy()]

    def print_average_reward(self):
        """
        Print the average revenue per interaction.
        """
        total_revenue = float(sum(self.wins))
        total_pulls = float(sum(self.pulls))
        print("Total interactions:", total_pulls)
        print("Total revenue:", total_revenue)
        average_revenue = total_revenue / total_pulls if total_pulls > 0 else 0
        print(f"Average revenue per interaction: {average_revenue}")

    def name(self):
        return "Value-Iteration"

In [37]:
def get_state(current_state: []):
    return int(''.join(current_state.astype(int).astype(str)), 2)


def get_state_price(price_chosen, price_levels):
    return np.where(price_levels == price_chosen[0])[0][0]


class QLearning:
    def __init__(self, epsilon: float, lr: float, gamma: float, seats_available: int, price_levels: []):
        self.epsilon = epsilon
        self.lr = lr
        self.name = "Q-Learning"
        self.gamma = gamma
        self.seats_available = seats_available
        #self.Q = np.zeros([2**self.seats_available, len(price_levels)])
        self.Q = np.zeros(len(price_levels))
        self.max_price = len(price_levels)
        self.price_levels = price_levels

    def calculate_step(self, current_state: []):
        if np.random.rand() < self.epsilon:
            choice = np.random.choice(self.max_price)
            return choice, self.price_levels[choice]
        else:
            #representation = get_state(current_state)
            #choice = np.argmax(self.Q[representation, :])
            choice = np.argmax(self.Q)
            return choice, self.price_levels[choice]

    def get_action(self, state: [], flight_type):
        if np.random.rand() < self.epsilon:
            choice = np.random.choice(self.max_price)
            prices = np.full(self.seats_available, self.price_levels[choice], dtype=float)
            return prices, choice
        else:
            #representation = get_state(state)
            #choice = np.argmax(self.Q[representation, :])
            choice = np.argmax(self.Q)
            prices = np.full(self.seats_available, self.price_levels[choice], dtype=float)
            return prices, choice

    def initialise_for_flight_type(self, flight_type):
        return

    def updateQ(self, state_at_action: [], new_state: [], action: int, reward: float):
        #previous_state_index = get_state(state_at_action)
        #new_state_index = get_state(new_state)
        previous_state_index = get_state_price(action, self.price_levels)
        new_state_index = get_state_price(action, self.price_levels)
        self.Q[previous_state_index, action] = (1 - self.lr) * self.Q[previous_state_index, action] + \
                                            self.lr * (reward + self.gamma * max(self.Q[new_state_index, :]))

    def update_during_booking(self, booking_index, total_customers, action,
                              start_state, prediction, current_revenue, current_state):
        return action, False

    def process_data(self, action, start_state, prediction, round_revenue, new_state,
                     customers_offered, flight_type):
        #previous_state_index = get_state(start_state)
        #new_state_index = get_state(new_state)
        previous_state_index = get_state_price(action, self.price_levels)
        new_state_index = get_state_price(action, self.price_levels)
        self.Q[previous_state_index] = (1 - self.lr) * self.Q[previous_state_index] + \
                                            self.lr * (round_revenue + self.gamma * self.Q[new_state_index])

    def name(self):
        return "Q-Learning"

    def print_matrix(self):
        print(self.Q)



In [38]:
import numpy as np


USE_MULTIPLE_FLIGHT_MODEL = False
USE_HIERARCHICAL_MODEL = False


class ProbaPrediction:
    def __init__(self, type: str, seats_available: int, prices_offered: [], nr_flight_types: int):
        self.TYPE = type
        self.name = "Binomial"
        self.seats_available = seats_available
        self.prices_possible = prices_offered
        self.prices = np.full((self.seats_available), np.random.choice(self.prices_possible))
        if USE_MULTIPLE_FLIGHT_MODEL:
            self.nr_different_flights = nr_flight_types
            self.action_history = [deque(maxlen=100) for _ in range(self.nr_different_flights)]
            self.state_history = [deque(maxlen=100) for _ in range(self.nr_different_flights)]
            self.state_next_history = [deque(maxlen=100) for _ in range(self.nr_different_flights)]
            self.number_offer_history = [deque(maxlen=100) for _ in range(self.nr_different_flights)]
            self.rewards_history = [deque(maxlen=100) for _ in range(self.nr_different_flights)]
            self.prediction_history = [deque(maxlen=100) for _ in range(self.nr_different_flights)]
            self.history = [deque(maxlen=100) for _ in range(self.nr_different_flights)]
            self.frame_count = np.full(self.nr_different_flights, 0)
            self.name = self.name + " multi-flight"
        else:
            self.action_history = deque(maxlen=100)
            self.state_history = deque(maxlen=100)
            self.state_next_history = deque(maxlen=100)
            self.number_offer_history = deque(maxlen=100)
            self.rewards_history = deque(maxlen=100)
            self.prediction_history = deque(maxlen=100)
            self.history = deque(maxlen=100)
            self.frame_count = 0
        if USE_MULTIPLE_FLIGHT_MODEL:
            self.p = [np.full(len(self.prices_possible), 0.2) for _ in range(self.nr_different_flights)]
        else:
            self.p = np.full(len(self.prices_possible), 0.2)
        if "Bayesian" in type:
            self.bayesian = True
        else:
            self.bayesian = False

        if self.bayesian:
            if USE_MULTIPLE_FLIGHT_MODEL:
                if USE_HIERARCHICAL_MODEL:
                    self.prior_alpha_hierarchical_mu = 4
                    self.posterior_alpha_hierarchical_mu = 4
                    self.prior_beta_hierarchical_mu = 10
                    self.posterior_beta_hierarchical_mu = 10
                    self.prior_hierarchical_sigma = 0.25
                    self.posterior_hierarchical_sigma = 0.25
                    self.alpha = list(
                                          np.array([np.random.normal(self.prior_alpha_hierarchical_mu,
                                                                     self.prior_hierarchical_sigma)
                                                    for _ in range(len(self.prices_possible))])
                                          for _ in range(self.nr_different_flights))
                    self.beta = list(
                                          np.array([np.random.normal(self.prior_beta_hierarchical_mu,
                                                                     self.prior_hierarchical_sigma)
                                                    for _ in range(len(self.prices_possible))])
                                          for _ in range(self.nr_different_flights))
                    for i in range(len(self.alpha)):
                        for j in range(len(self.alpha[i])):
                            if self.alpha[i][j] <= 0:
                                print("Error")
                    self.prior_initialised = np.full(self.nr_different_flights, False)
                else:
                    self.alpha = [np.full(len(self.prices_possible), 4) for _ in range(self.nr_different_flights)]
                    self.beta = [np.full(len(self.prices_possible), 10) for _ in range(self.nr_different_flights)]
            else:
                self.alpha = np.full(len(self.prices_possible), 4)
                self.beta = np.full(len(self.prices_possible), 10)
            self.name = self.name + " Bayesian"
            if USE_HIERARCHICAL_MODEL:
                self.name = self.name + " Hierarchical"
        if "UCB" in type:
            self.UCB = True
            self.name = self.name + " UCB"
        else:
            self.UCB = False

    def name(self):
        return self.name

    def initialise_for_flight_type(self, flight_type):
        if USE_HIERARCHICAL_MODEL:
            if not self.prior_initialised[flight_type]:
                self.alpha[flight_type] = np.array([np.random.normal(self.posterior_alpha_hierarchical_mu,
                                                                     self.posterior_hierarchical_sigma)
                                                    for _ in range(len(self.prices_possible))])
                self.beta[flight_type] = np.array([np.random.normal(self.posterior_beta_hierarchical_mu,
                                                                     self.posterior_hierarchical_sigma)
                                                   for _ in range(len(self.prices_possible))])
                self.prior_initialised[flight_type] = True
                for j in range(len(self.alpha[flight_type])):
                    if self.alpha[flight_type][j] <= 0:
                        self.alpha[flight_type][j] = 0.01

    def get_probability_with_price(self, price, flight_type):
        if self.bayesian:
            price_index = np.where(self.prices_possible == price)
            if self.UCB:
                if USE_MULTIPLE_FLIGHT_MODEL:
                    return stats.beta.ppf(0.9, self.alpha[flight_type][price_index], self.beta[flight_type][price_index])
                else:
                    return stats.beta.ppf(0.9, self.alpha[price_index], self.beta[price_index])
            else:
                if USE_MULTIPLE_FLIGHT_MODEL:
                    return stats.beta.rvs(self.alpha[flight_type][price_index], self.beta[flight_type][price_index],
                                          size=1)[0]
                else:
                    return stats.beta.rvs(self.alpha[price_index], self.beta[price_index], size=1)[0]
        else:
            if USE_MULTIPLE_FLIGHT_MODEL:
                return self.p[flight_type][np.where(self.prices_possible == price)][0]
            else:
                return self.p[np.where(self.prices_possible == price)][0]


    def get_prediction(self, price, flight_type):
        # Return expectation of binomial distribution
        return self.get_probability_with_price(price, flight_type) * self.seats_available

    def get_action(self, state: [], flight_type = 0):
        optimal_price = 0
        best_expectation = 0
        best_expected_seats = 0
        for i in self.prices_possible:
            expected_seats = self.get_prediction(i, flight_type)
            expected_revenue = expected_seats * i
            if expected_revenue > best_expectation:
                optimal_price = i
                best_expectation = expected_revenue
                best_expected_seats = expected_seats
        return np.full((self.seats_available), optimal_price), expected_seats

    def update_during_booking(self, booking_index, total_customers, action,
                              start_state, prediction, current_revenue, current_state):
        #return action
        seats_sold = sum(current_state) - sum(start_state)
        probability_of_seats_lower_equal = stats.binom.cdf(seats_sold,
                                                           booking_index,
                                                           self.get_probability_with_price(action[0]))

        if probability_of_seats_lower_equal < 0.05:
            # Not supposed to sell so few, so decrease price
            used_price = action[0]
            self.action_history.append(np.copy(used_price))
            self.state_next_history.append(np.copy(seats_sold))
            self.number_offer_history.append(np.copy(booking_index))
            action = self.prices_possible[np.where(self.prices_possible == action[0])[0] - 1]
            return np.full(self.seats_available, action), True
        elif probability_of_seats_lower_equal > 0.95 and seats_sold > 1:
            # Not supposed to sell everything, so increase price
            used_price = action[0]
            self.action_history.append(np.copy(used_price))
            self.state_next_history.append(np.copy(seats_sold))
            self.number_offer_history.append(np.copy(booking_index))
            action = self.prices_possible[np.where(self.prices_possible == action[0])[0] + 1]
            return np.full(self.seats_available, action), True
        return np.full(self.seats_available, action), False

    def new_flight(self):
        if self.bayesian:
            for idx in range(len(self.prices_possible)):
                if self.alpha[idx] != 4 and self.beta[idx] != 10:
                    to_divide = self.beta[idx] / 2
                    self.alpha[idx] = self.alpha[idx] / to_divide
                    if self.alpha[idx] == 0:
                        self.alpha[idx] = 1
                    self.beta[idx] = self.beta[idx] / to_divide

    def process_data(self, action, start_state, prediction, round_revenue, new_state, times_offered, flight_type):
        used_price = action[0]
        seats_sold = sum(new_state) - sum(start_state)
        if USE_MULTIPLE_FLIGHT_MODEL:
            self.action_history[flight_type].append(np.copy(used_price))
            self.state_history[flight_type].append(np.copy(start_state))
            self.prediction_history[flight_type].append(prediction)
            self.rewards_history[flight_type].append(round_revenue)
            self.state_next_history[flight_type].append(seats_sold)
            self.number_offer_history[flight_type].append(times_offered)
            self.frame_count[flight_type] += 1
            prev_actions = np.array(self.action_history[flight_type])
        else:
            self.action_history.append(np.copy(used_price))
            self.state_history.append(np.copy(start_state))
            self.prediction_history.append(prediction)
            self.rewards_history.append(round_revenue)
            self.state_next_history.append(seats_sold)
            self.number_offer_history.append(times_offered)
            self.frame_count += 1
            prev_actions = np.array(self.action_history)

        # Update price - probability figures
        for used_price in self.prices_possible:
            if USE_MULTIPLE_FLIGHT_MODEL:
                prev_results = np.array(self.state_next_history[flight_type])[
                    np.where(prev_actions == used_price)]
                prev_number_offered = np.array(self.number_offer_history[flight_type])[
                    np.where(prev_actions == used_price)]
            else:
                prev_results = np.array(self.state_next_history)[np.where(prev_actions == used_price)]
                prev_number_offered = np.array(self.number_offer_history)[np.where(prev_actions == used_price)]

            if len(prev_results) > 0:
                total_sold_prev = prev_results
                if self.bayesian:
                    # Update Bayesian
                    price_to_update = used_price
                    price_index = np.where(self.prices_possible == price_to_update)
                    if USE_MULTIPLE_FLIGHT_MODEL:
                        new_alpha = self.alpha[flight_type][price_index] + sum(prev_results)
                        new_beta = self.beta[flight_type][price_index] + sum(prev_number_offered) - sum(prev_results)
                        self.alpha[flight_type][price_index] = new_alpha
                        self.beta[flight_type][price_index] = new_beta
                    else:
                        new_alpha = self.alpha[price_index] + sum(prev_results)
                        new_beta = self.beta[price_index] + sum(prev_number_offered) - sum(prev_results)
                        self.alpha[price_index] = new_alpha
                        self.beta[price_index] = new_beta
                else:
                    new_probability = np.mean(total_sold_prev / prev_number_offered)
                    if USE_MULTIPLE_FLIGHT_MODEL:
                        self.p[flight_type][np.where(self.prices_possible == used_price)] = new_probability
                    else:
                        self.p[np.where(self.prices_possible == used_price)] = new_probability

        if USE_HIERARCHICAL_MODEL:
            data = self.state_next_history[flight_type]
            N = np.size(data)
            mean_data = np.mean(data)
            SSD = sum((data - mean_data) ** 2) + 1e-5
            self.posterior_hierarchical_sigma = (1 / self.prior_hierarchical_sigma + N / SSD) ** -1
            self.posterior_alpha_hierarchical_mu = (1 / ((1 / self.prior_hierarchical_sigma) +
                                                    (N / self.posterior_hierarchical_sigma))) * \
                                             ((self.prior_alpha_hierarchical_mu / self.prior_hierarchical_sigma) + \
                                              sum(data) / SSD)
            self.posterior_beta_hierarchical_mu = (1 / ((1 / self.prior_hierarchical_sigma) +
                                                         (N / self.posterior_hierarchical_sigma))) * \
                                                   ((self.prior_beta_hierarchical_mu / self.prior_hierarchical_sigma) + \
                                                    sum(data) / SSD)



In [15]:
# TODO: Add more comparison

# Simulations

In [39]:
ENABLE_UPDATE_DURING_SALES = False
ENABLE_MULTIPLE_SALES_WINDOWS = False
OUTPUT_PER_FIVE_FLIGHTS = True

In [40]:
def run_simulation_multiple_flights(simulation_rounds: int, nr_different_flights: int, nr_flights_per_group: int,
                                    seats_available: int, prices_offered: [], models: []):
    total_revenue = 0
    total_seats_sold = 0
    print_count = []
    model_performance = []
    predictions = []
    results = []
    print("Text; Round; Text; Model name; Text; Revenue; Text; Max Revenue; Text; Share of max revenue")
    for _ in range(len(models)):
        model_performance.append([])
        print_count.append(0)

    flight_group_parameters = list()
    for _ in range(nr_different_flights):
        customer_max = np.random.randint(80, 120)
        customer_probability = np.random.uniform(0.7, 0.95)
        customer_wtp_sigma = np.random.normal(0.35, 0.2)
        customer_wtp_sigma = customer_wtp_sigma if customer_wtp_sigma >= 0 else 0
        customer_wtp_scale = np.random.poisson(40)
        flight_group_parameters.append([customer_max, customer_probability, customer_wtp_sigma, customer_wtp_scale])

    for simulation_count in range(simulation_rounds):
        flight_groups = list()
        for flight_group_nr in range(nr_different_flights):
            flights = list()
            customer_max = flight_group_parameters[flight_group_nr][0]
            customer_probability = flight_group_parameters[flight_group_nr][1]
            customer_wtp_sigma = flight_group_parameters[flight_group_nr][2]
            customer_wtp_scale = flight_group_parameters[flight_group_nr][3]
            for _ in range(nr_flights_per_group):
                total_customers = np.random.binomial(customer_max, customer_probability)
                #total_customers = 100
                flight_simulation = SeatSimulationFlight(total_nr_customers=total_customers,
                                                            wtp_mu=0, wtp_sigma=customer_wtp_sigma,
                                                            wtp_scale=customer_wtp_scale,
                                                            seats_available=seats_available,
                                                            prices_offered=prices_offered)
                flights.append(flight_simulation)
            flight_groups.append(flights)

        for model_idx in range(len(models)):
            model = models[model_idx]
            simulation_seats_sold = 0
            simulation_revenue = 0.0
            simulation_max_seats_sold = 0
            simulation_max_revenue = 0.0
            number_of_flights_offered = 0
            prices_offers_used = []
            #for flight_idx in range(nr_flights_per_group):
            #    for flight_type_idx in range(nr_different_flights):
            for flight_type_idx in range(nr_different_flights):
                for flight_idx in range(nr_flights_per_group):
                    number_of_flights_offered += 1
                    model.initialise_for_flight_type(flight_type_idx)
                    flight = flight_groups[flight_type_idx][flight_idx]
                    flight_max_revenue, flight_max_seats = flight.theoretical_max()
                    simulation_max_revenue += flight_max_revenue
                    simulation_max_seats_sold += flight_max_seats
                    flight_copy = copy.deepcopy(flight)
                    state = np.copy(flight_copy.seats_sold)
                    action, prediction = model.get_action(state=state, flight_type=flight_type_idx)
                    predictions.append(prediction)
                    price_offer = np.copy(action)
                    new_action = np.copy(action)
                    prices_offers_used.append(price_offer[0])
                    flight_copy.update_price_offer(price_offer)
                    flight_revenue = 0
                    customers_offered = 0
                    # Booking window
                    for i in range(flight_copy.nr_bookings):
                        use_data, round_revenue, round_seats_sold = flight_copy.sell_seat2(i)
                        customers_offered += flight_copy.get_nr_customers(i)
                        simulation_seats_sold += round_seats_sold
                        simulation_revenue += round_revenue
                        flight_revenue += round_revenue
                        if ENABLE_UPDATE_DURING_SALES:
                            current_state = np.copy(flight_copy.seats_sold)
                            total_customers = flight_copy.total_nr_customers
                            new_action, price_updated = model.update_during_booking(customers_offered, total_customers,
                                                                                    new_action, state, prediction,
                                                                                    flight_revenue, current_state,
                                                                                    flight_type_idx)
                            if price_updated:
                                price_offer = np.copy(new_action)
                                flight_copy.update_price_offer(price_offer)
                                state = np.copy(flight_copy.seats_sold)
                                customers_offered = 0
                    if ENABLE_MULTIPLE_SALES_WINDOWS:
                        # Reservation window, reset pricing strategy
                        price_offer = np.copy(action)
                        flight_copy.update_price_offer(price_offer)
                        customers_offered = 0
                        for i in range(flight_copy.nr_bookings):
                            chance_check_seats = np.random.uniform(0, 1)
                            if chance_check_seats > 0.8:
                                use_data, round_revenue, round_seats_sold = flight_copy.sell_seat2(i)
                                simulation_seats_sold += round_seats_sold
                                simulation_revenue += round_revenue
                                flight_revenue += round_revenue
                                customers_offered += flight_copy.get_nr_customers(i)
                        # Check-in window
                        for i in range(flight_copy.nr_bookings):
                            use_data, round_revenue, round_seats_sold = flight_copy.sell_seat2(i)
                            simulation_seats_sold += round_seats_sold
                            simulation_revenue += round_revenue
                            flight_revenue += round_revenue
                            customers_offered += flight_copy.get_nr_customers(i)
                    end_state = np.copy(flight_copy.seats_sold)
                    result = sum(flight_copy.seats_sold)
                    results.append(result)
                    model.process_data(action, state, prediction, flight_revenue, end_state,
                                       customers_offered, flight_type_idx)
                    if OUTPUT_PER_FIVE_FLIGHTS:
                        if number_of_flights_offered % 100 == 0:
                            total_revenue += simulation_revenue
                            total_seats_sold += simulation_seats_sold
                            percentage_of_max_revenue = 100 * (simulation_revenue / simulation_max_revenue)
                            model_performance[model_idx].append(simulation_revenue)
                            average_difference = np.mean(np.array(results) - np.array(predictions))
                            average_price = np.mean(prices_offers_used)
                            results = []
                            predictions = []

                            print("Simulation;", print_count[model_idx], ";name;", model.name, ";revenue;",
                                  trunc(simulation_revenue),
                                  ";max revenue;", trunc(simulation_max_revenue), ";share of max;",
                                  trunc(percentage_of_max_revenue),
                                  "; average price;", trunc(average_price))
                            print_count[model_idx] += 1
                            prices_offers_used = []
                            simulation_seats_sold = 0
                            simulation_revenue = 0.0
                            simulation_max_seats_sold = 0
                            simulation_max_revenue = 0.0

            if not OUTPUT_PER_FIVE_FLIGHTS:
                total_revenue += simulation_revenue
                total_seats_sold += simulation_seats_sold
                percentage_of_max_revenue = 100 * (simulation_revenue / simulation_max_revenue)
                model_performance[model_idx].append(simulation_revenue)
                average_difference = np.mean(np.array(results) - np.array(predictions))
                results = []
                predictions = []

                print("Simulation;", simulation_count, ";name;", model.name, ";revenue;", trunc(simulation_revenue),
                      ";max revenue;", trunc(simulation_max_revenue), ";share of max;", trunc(percentage_of_max_revenue))

In [61]:
prices = np.linspace(10, 100, 10)
np.random.seed(42)
seats_available = 18
simulation_rounds = 75
nr_flights = 6
nr_different_flights = 50



In [None]:
model_value_iteration = PricingValueIteration(price_points=prices, max_iterations=2000, 
                                              calculate_value_function=True, seats_available=seats_available)


In [None]:
model_proba = ProbaPrediction.ProbaPrediction(type="Binomial", seats_available=seats_available,
                                              prices_offered=price, nr_flight_types=nr_different_flights)


In [None]:
qlearning = QLearning.QLearning(epsilon=0.01, lr=0.995, gamma=0.95,
                                seats_available=seats_available, price_levels=price)


In [None]:
models = []

models.append(model_value_iteration)
#models.append(model_proba)
#models.append(qlearning)

run_simulation_multiple_flights(simulation_rounds, nr_different_flights, nr_flights,
                                seats_available, prices, models)

In [51]:
prices = np.linspace(10, 100, 5)
prices.tolist().index(77.5)

3