In [1]:
import math
import random
import matplotlib
import matplotlib.pyplot as plt
from collections import namedtuple, deque
from itertools import count
import numpy as np

In [2]:
import torch as T
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

In [3]:
'''Dynamic Programming, FCFS Simulated result'''
dp = 60940
fcfs = 45276

In [4]:
'''Establish an aircraft'''
class aircraft:

    # Initialize aircraft
    def __init__(self, adjusted_seat_price):
        self.seat_capacity = 100
        self.seat_type = ['Y', 'M', 'K']
        self.adjusted_seat_price = adjusted_seat_price
        self.init_seat_price = {'f': 0, 'Y': 800, 'M': 500, 'K': 450}

In [5]:
'''Establish Customer class'''
class Customer(aircraft):

    # Initialize customer
    def __init__(self):
        super().__init__(adjusted_seat_price)
        
        self.customer_type = {0: 'f', 1: 'Bus1', 2:'Bus2', 3:'Leis1', 4: 'Leis2', 5: 'Leis3'}
        self.num_customer_type = len(self.customer_type)
        self.customer_preference = {
            'f':{'Y': False, 'M': False, 'K': False},
            'Bus1': {'Y': True, 'M': False, 'K': False},
            'Bus2': {'Y': True, 'M': True, 'K': False},
            'Leis1': {'Y': False, 'M': True, 'K': False},
            'Leis2': {'Y': False, 'M': True, 'K': True},
            'Leis3': {'Y': False, 'M': False, 'K': True},
        }
        self.consideration_set = {
            'Bus1': {'Y': True, 'M': True, 'K': True},
            'Bus2': {'Y': True, 'M': True, 'K': True},
            'Leis1': {'Y': False, 'M': True, 'K': True},
            'Leis2': {'Y': False, 'M': True, 'K': True},
            'Leis3': {'Y': False, 'M': False, 'K': True},
        }
        self.wtp = {'Bus1': 1000, 'Bus2': 900, 'Leis1': 750, 'Leis2': 600, 'Leis3': 450}

    '''update lambda for this RD'''
    def update_lambda(self, init_period, new_demand, CUS_arr_percentage_dict):
        new_lambda = new_damand/init_period
        new_arrival_list = []
        new_arrival_list.append(round(1-new_lambda, 2))
        for customer in CUS_arr_percentage_dict: # arrival rates for each customer
            new_arrival_list.append(round(CUS_arr_percentage_dict[customer] * new_lambda, 2)) 
        # print("new_arrival_list: ", new_arrival_list)
        return new_arrival_list

    '''customer generation'''
    def generate_customer(self, new_arrival_list):
        random_number = np.random.rand() # generate random number
        probabilities = new_arrival_list # arrival rates list
        cumulative_probability = 0 # Use cumulative probability decide customer type
        customer_index = 0
        for probability in probabilities:
            cumulative_probability += probability
            if random_number <= cumulative_probability:
                break
            customer_index += 1
        customer_type = self.customer_type[customer_index] # return customer will buy what kind of seat   
        # print(f"random_number: {random_number}, customer_index: {customer_index}, customer_type: {customer_type}")
        return customer_type

    '''customer's preference seat under control'''
    def preference_seat(self, customer_type, seat_open):
        preferred_seats = []  
        preferences = self.customer_preference[customer_type]  
        for seat_type, preference in preferences.items():
            if preference and seat_type in seat_open:
                preferred_seats.append(seat_type)
        # print("preference seats: ", preferred_seats)
        return preferred_seats  
    
    '''customer make decision'''
    def make_decision(self, customer_type, seat_open):
        preferred_seats = self.preference_seat(customer_type, seat_open)  
        if preferred_seats:
            cheapest_seat = min(preferred_seats, key=lambda x: self.init_seat_price[x])
            return cheapest_seat
        else:
            return 'f'

In [6]:
''' Demand model from Balaiyan et al.'''
class demandmodel(Customer):
    
    # Initialize demand model
    def __init__(self, adjusted_seat_price):
        super().__init__(adjusted_seat_price)
        
        self.version = 'This is Balaiyan demand model version for consider customer consideration set, willingness to pay, and \
        the modified MNL.'
        
        # demand model parameters
        self.total_booking = 105
        self.market_share = 0.25
        self.gamma = 0.08426
        self.alpha = 0.001251
        self.beta = {'DFARE':-0.006, 'LOT3':-0.944}
        self.a = {'Y':{'DFARE':None, 'LOT3':1},
                  'M':{'DFARE':None, 'LOT3':1},
                  'K':{'DFARE':None, 'LOT3':1},
                 }           
        self.cus_seat_table = {}

    # Calculate dm
    def dm(self):
        dm = self.total_booking/self.market_share
        return dm

    # Calcilate booking curve
    def booking_curve(self, RD2, RD1):
        booking_curve = math.exp(-self.gamma*RD2)-math.exp(-self.gamma*RD1)
        return booking_curve
    
    # find pj+1
    def find_p0_pj1_pj(self, preferred_seats):
        prices = [self.adjusted_seat_price[seat_type] for seat_type in preferred_seats]
        sorted_prices = sorted(prices, reverse=True)
        if len(preferred_seats) == 1:
            max_seat_type = max(preferred_seats, key=self.adjusted_seat_price.get)
            p0 = pj1 = pj = self.adjusted_seat_price[max_seat_type]
        else:
            p0 = sorted_prices[len(sorted_prices)-1]
            pj1 = sorted_prices[1]
            pj = sorted_prices[0]
        if customer == 'Leis3':
            pj1 = 400
        p0 = 400
        return p0, pj1, pj

    # Update fare difference for mnl
    def recalculate_fare_difference(self):
        new_fare_diff_avg = sum(self.adjusted_seat_price.values()) / (len(self.adjusted_seat_price)-1)
        for seat_type in self.a:
            self.a[seat_type]['DFARE'] = abs(round(self.adjusted_seat_price[seat_type] - new_fare_diff_avg, 2))        
    
    # Multinomial logit model
    def mnl(self, seat_type, preferred_seats):
        self.recalculate_fare_difference() # Update fare difference for mnl first
        if seat_type in preferred_seats:
            seat_value_dict = {}
            for seat in preferred_seats: # Calculating this customer type's prefered seat Value function
                seat_v = 0
                for key, value in self.a[seat].items(): # Get the a & Beta from seat_type
                    seat_v += self.beta[key] * value
                seat_value_dict[seat] = seat_v
            choose_prob = {}
            for key, value in seat_value_dict.items():
                choose_prob[key] = math.exp(seat_value_dict[key])/sum(math.exp(value) for value in seat_value_dict.values())
        else:
            choose_prob = {'Y':0, 'M':0, 'K':0}   
        return choose_prob

    # Calculate customer choice
    def customer_choice(self, seat_type):
        total_sum = 0
        for key, customer in self.customer_type.items():
            if customer != 'f':
                preferenced_seats = self.consideration_set[customer]
                preferred_seats = [seat for seat, preference in preferenced_seats.items() if preference]
                choose_prob = self.mnl(seat_type, preferred_seats)
                p0, pj1, pj = self.find_p0_pj1_pj(preferred_seats, self.adjusted_seat_price)
                sum_of_set = (math.exp(-self.alpha*(pj-p0))-math.exp(-self.alpha*(pj1-p0))) * choose_prob[seat_type]
                total_sum += sum_of_set
                if customer not in self.cus_seat_table:
                    self.cus_seat_table[customer] = {}
                self.cus_seat_table[customer][seat_type] = sum_of_set
        return total_sum

    # Calculate demand
    def formulation(self, RD2, RD1):
        dm = self.dm()
        booking_curve = self.booking_curve(RD2, RD1)
        BR_dict = {}
        for seat in self.seat_set:
            customer_choice = self.customer_choice(seat)
            BR = dm * booking_curve * customer_choice
            BR_dict[seat] = BR
        print("total demand: ", sum(BR_dict.values()))  
        # Calculate the arrival percentage of every customer in the dict
        CUS_arr_percentage_dict = self.calculate_customer_arrival_rate(dm, booking_curve, RD2, RD1)    
        return BR_dict, CUS_arr_percentage_dict
    
    # Verified total demand from customer type, need to operate function formulation first
    def calculate_customer_arrival_rate(self, dm, booking_curve, RD2, RD1):    
        CUS_dict = {}
        sum_of_cus_total_value = {} 
        
        # Calculate every customer's total value
        for customer, values in self.cus_seat_table.items():
            sum_of_cus_total_value[customer] = sum(values.values())  
            
        # Calculate every customer's total demand
        for customer, total_value in sum_of_cus_total_value.items():
            BR = dm * booking_curve * total_value
            CUS_dict[customer] = BR
            # print(' customer: ', customer, ' predicted demand model from ', RD2,' to ', RD1, 'is', BR)
        # print('sum_of_cus_total_value:', sum_of_cus_total_value)
        # Customer's arrival percentage in this RD
        total_sum = sum(CUS_dict.values())  
        CUS_arr_percentage_dict = {key: '{:.2f}%'.format((value / total_sum) * 100) for key, value in CUS_dict.items()}
        return CUS_arr_percentage_dict       
    
    # Plot demand result
    def plot_demand(self, RD2, RD1):
        
        # Store calculated result
        every_rd = {seat: [] for seat in self.seat_type}  
        cumulative_demand = {seat: [] for seat in self.seat_type}  
        total_demand = {seat: 0 for seat in self.seat_type}  
        cumulative_total_demand_per_rd = []  # Store each RD cumulative demand

        # Calculate all RD demand
        cumulative_total_demand = 0  # Initialize cumulative demand
        for i in range(RD1, RD2):
            print('-------------------RD: ', i+1 ,'to RD: ',i, '----------------------')
            BR_dict, CUS_dict = self.formulation(i+1, i)  
            total_demand_rd = sum(BR_dict.values())  # Calculate demand form all rd
            cumulative_total_demand += total_demand_rd  # update cumulative total deamnd
            print('cumulative_total_demand: ', cumulative_total_demand)
            cumulative_total_demand_per_rd.append(cumulative_total_demand)  # append cumulative total demand
            for seat, demand in BR_dict.items():  
                every_rd[seat].append(demand) 
                total_demand[seat] += demand  
                cumulative_demand[seat].append(total_demand[seat])  
        
        plt.figure(figsize=(10, 6))
        plt.plot(range(RD1, RD2), cumulative_total_demand_per_rd, label='Total Demand')
        plt.xlabel('RD')
        plt.ylabel('Total Demand')
        plt.title('Total Demand Model')
        plt.legend()
        plt.xticks(range(0, RD2, int((RD2)/10)))
        plt.grid(True)
        plt.show()

In [7]:
'''Action space'''
class AgentActionSpacev0:
    def __init__(self):
        self.action_list = [0, 1, 2, 3, 4, 5, 6, 7]
        # self.action_list = [0, 1, 2, 3]
        self.n = len(self.action_list)  

    def sample(self):
        return np.random.choice(self.action_list)

    def contains(self, action):
        return action in self.action_list

In [8]:
# import gym
# from gym.spaces import MultiDiscrete

# class AgentActionSpace(gym.spaces.MultiDiscrete):
#     def __init__(self, nvec):
#         super(AgentActionSpace, self).__init__(nvec)
#         self.n = np.prod(nvec)

#     def sample(self):
#         return np.random.randint(0, np.array(self.nvec), dtype=int)

#     def contains(self, action):
#         if isinstance(action, list) and len(action) == len(self.nvec):
#             for a, n in zip(action, self.nvec):
#                 if a < 0 or a >= n:
#                     return False
#             return True
#         else:
#             return False

# # Example usage
# nvec = [4, 4, 4]  # 每个子集的动作数量
# action_space = AgentActionSpace(nvec)
# print('action space: ', action_space)
# print('action space.n: ', action_space.n)
# print('sample: ', action_space.sample())  # 随机采样一个动作
# print('contains: ', action_space.contains([1, 2, 3]))  # 检查动作是否有效

# md_space = MultiDiscrete([2, 3])
# print(md_space)
# print('action space: ', md_space)
# print('action space.n: ', md_space.nvec.prod())
# print('sample: ', md_space.sample())  # 随机采样一个动作
# print('contains: ', md_space.contains([1, 2]))  # 检查动作是否有效

action space:  MultiDiscrete([4 4 4])
action space.n:  64
sample:  [3 2 0]
contains:  True
MultiDiscrete([2 3])
action space:  MultiDiscrete([2 3])
action space.n:  6
sample:  [0 0]
contains:  True


In [9]:
'''Establish env with Single Cabin mulitiple fare classes'''
class AirlineEnvironment:
    
    '''Initialize env parameters'''
    def __init__(self, name):
        
        # name of env
        self.name = name
        
        # inherent aircraft
        self.aircraft = aircraft()
        self.seat_capacity = self.aircraft.seat_capacity # seat limitation
        self.seat_type = self.aircraft.seat_type # seat type
        self.init_seat_price = self.aircraft.init_seat_price
        
        # inherent action space class
        # nvec = [4, 4, 4]  # every actions for a subset
        # self.action_space = MultiDiscrete(nvec)
        self.agent_action = AgentActionSpace() 
        self.action_space = self.agent_action.action_list
        
        # inherent attributes from demand model
        self.max_rd = 20 # total selling RDs
        self.demand_model = demandmodel()
        self.initial_lambda = 0.8 # initial lambda for customer arrival

        # inherent attributes from Customer class
        self.customer = Customer() 
        
        # environment parameters
        self.seat_remain = self.seat_capacity # seat limitation
        self.state = np.array([self.seat_remain, self.max_rd]) # Initialize state : (num seat sold, period)

    '''reset env'''
    def reset(self):
        self.seat_remain = self.seat_capacity # Initialize total seat 
        self.state = np.array([self.seat_remain, self.max_rd])  # Initialize state
        return self.state

    # '''action correlated to the price adjust'''
    # def action_policy(self, action):
    #     if action
    
    '''Step'''
    def step(self, state, action, price_status_dict):

        # total reward this RD
        total_reward_rd = 0

        # Agent choose a price policy to update the " price_status_dict "
        adjusted_policy = self.action_space[action]
        price_status_dict = {key: round(value * adjusted_policy[i], 2) for i, (key, value) in enumerate(price_status_dict.items())}
        print("price adjust: ", price_status_dict)

        # Calculate periods of this RD
        init_periods = sum(self.demand_model.formulation(state[1], state[1]-1, self.init_seat_price).values())/self.initial_lambda
        new_demand = sum(self.demand_model.formulation(state[1], state[1]-1, price_status_dict).values())
        new_arrival_list = update_lambda(init_period, new_demand, cus_arr_prob)
        
        # calculate reward this period
        for period in range(1, init_periods):
        
            # With remaining seat
            if self.seat_remain > 0:

                # Customer generation
                customer_type = self.customer.generate_customer(new_arrival_list)
                # print("customer type: ", customer_type)
    
                # Customer choose seat
                chosen_seat = self.customer.make_decision(customer_type, seat_open) 
                # print("chosen seat: ", chosen_seat)
    
                # Decide immediate revenue
                reward = self.aircraft.seat_price[chosen_seat] 
    
                # Update seat remain
                if reward > 0:
                    self.seat_remain = self.seat_remain-1
            
            # Without remaining seat 
            else:
                # print("No remaining seat.")
                reward = 0

        # Update period
        next_time = state[1].item()-1
        
        # Check departure or not 
        departure = (next_time <= 0)

        # update state
        state[0] = self.seat_remain
        state[1] = next_time
        return state, reward, departure, price_status_dict

In [10]:
'''Deep Q Network'''
class DeepQNetwork(nn.Module):
    def __init__(self, lr, n_actions, input_dims):
        super(DeepQNetwork, self).__init__()
        self.fc1 = nn.Linear(input_dims, 512)
        self.fc2 = nn.Linear(512, n_actions)

        self.optimizer = optim.RMSprop(self.parameters(), lr=lr)

        self.loss = nn.MSELoss()
        self.device = T.device('cuda:0' if T.cuda.is_available() else 'cpu')
        self.to(self.device)

    def forward(self, state):
        x = F.relu(self.fc1(state))
        actions = self.fc2(x)

        return actions

In [11]:
'''Define replay buffer'''
class ReplayBuffer(object):
    def __init__(self, max_size, input_shape, n_actions):
        self.mem_size = max_size
        self.mem_cntr = 0
        self.state_memory = np.zeros((self.mem_size, *input_shape),
                                     dtype=np.float32)
        self.new_state_memory = np.zeros((self.mem_size, *input_shape),
                                         dtype=np.float32)

        self.action_memory = np.zeros(self.mem_size, dtype=np.int64)
        self.reward_memory = np.zeros(self.mem_size, dtype=np.float32)
        self.terminal_memory = np.zeros(self.mem_size, dtype=np.bool_)

    # store transition to buffer
    def store_transition(self, state, action, reward, state_, done):
        index = self.mem_cntr % self.mem_size
        self.state_memory[index] = state
        self.new_state_memory[index] = state_
        self.action_memory[index] = action
        self.reward_memory[index] = reward
        self.terminal_memory[index] = done
        self.mem_cntr += 1

    # sample transition from buffer
    def sample_buffer(self, batch_size):
        max_mem = min(self.mem_cntr, self.mem_size)
        batch = np.random.choice(max_mem, batch_size, replace=False)

        states = self.state_memory[batch]
        actions = self.action_memory[batch]
        rewards = self.reward_memory[batch]
        states_ = self.new_state_memory[batch]
        terminal = self.terminal_memory[batch]

        return states, actions, rewards, states_, terminal

In [12]:
''' Define DQN agent'''
class DQNAgent(object):
    def __init__(self, gamma, epsilon, lr, n_actions, input_dims,
                 mem_size, batch_size, eps_min, eps_dec, replace):
        self.gamma = gamma # time discount gamma
        self.epsilon = epsilon # epilson-greedy hyperparameter eplison
        self.lr = lr # learning rate
        self.n_actions = n_actions # number of action
        self.input_dims = input_dims # number of state
        self.batch_size = batch_size # batch size of sample memory
        self.eps_min = eps_min # minimum of hyperparameter epilson
        self.eps_dec = eps_dec # epilson decay rate, higher represent slower decay
        self.replace_target_cnt = replace # frequence of replace target network 
        self.action_space = [i for i in range(n_actions)]
        self.learn_step_counter = 0
        self.memory = ReplayBuffer(mem_size, (input_dims,), n_actions) # Replay buffer
        self.q_eval = DeepQNetwork(self.lr, self.n_actions,
                                    input_dims=self.input_dims) # Target network
        self.q_next = DeepQNetwork(self.lr, self.n_actions,
                                    input_dims=self.input_dims) # Policy network

    # agent choose action
    def choose_action(self, observation):
        if np.random.random() > self.epsilon: # exploit: selection max value column
            state = T.tensor([observation],dtype=T.float).to(self.q_eval.device)
            actions = self.q_eval.forward(state)
            action = T.argmax(actions).item()
        else: # explore: randomly select aciton
            action = np.random.choice(self.action_space)
        return action

    # store transition
    def store_transition(self, state, action, reward, state_, done):
        self.memory.store_transition(state, action, reward, state_, done)

    # sample memory
    def sample_memory(self):
        state, action, reward, new_state, done = \
                                self.memory.sample_buffer(self.batch_size)
        states = T.tensor(state).to(self.q_eval.device)
        rewards = T.tensor(reward).to(self.q_eval.device)
        dones = T.tensor(done).to(self.q_eval.device)
        actions = T.tensor(action).to(self.q_eval.device)
        states_ = T.tensor(new_state).to(self.q_eval.device)
        return states, actions, rewards, states_, dones

    # update parameter of target network
    def replace_target_network(self):
        if self.learn_step_counter % self.replace_target_cnt == 0:
            self.q_next.load_state_dict(self.q_eval.state_dict())

    # decrease value of epilson
    def decrement_epsilon(self):
        self.epsilon = self.epsilon - self.eps_dec \
                           if self.epsilon > self.eps_min else self.eps_min

    # optimizer model
    def learn(self):

        if self.memory.mem_cntr < self.batch_size: # check whether have enough memory
            return

        self.q_eval.optimizer.zero_grad() # diminish previous gradient

        self.replace_target_network()

        states, actions, rewards, states_, dones = self.sample_memory()
        indices = np.arange(self.batch_size)

        q_pred = self.q_eval.forward(states)[indices, actions]
        q_next = self.q_next.forward(states_).max(dim=1)[0]

        q_next[dones] = 0.0
        q_target = rewards + self.gamma*q_next

        loss = self.q_eval.loss(q_target, q_pred).to(self.q_eval.device)
        loss.backward()
        self.q_eval.optimizer.step()
        self.learn_step_counter += 1

        self.decrement_epsilon()

In [13]:
# Initialize Training parameters
num_episodes = 10
DQN_total_revenues = []
DQN_cumulative_average = []
DQN_per_50_avg = []

env = AirlineEnvironment("Single route parallel flight.v0")
agent = DQNAgent(gamma=0.95, epsilon=1, lr=0.0001,
                 input_dims=len(env.state),
                 n_actions=env.action_space.nvec.prod(), mem_size=300000, eps_min=0.1,
                 batch_size=256, replace=1000, eps_dec=1e-5)

n_step = 0
best_score = 0

# Start Training
for episode in range(1, num_episodes+1):
    
    # Initialize environment
    # print("---------- Episode ", episode, "-----------")
    observation = env.reset()
    total_revenue = 0 # Initialize total revenue
    departure = None
    price_status_dict = env.init_seat_price
    
    while not departure:
        
        # print("------- step " , env.max_rd - n_step," ---------")
        
        # agent select action
        action = agent.choose_action(observation)
        print("action", action)

        # env return observation
        observation_, reward, departure, price_status_dict_ = env.step(observation, action, price_status_dict)
        # print("observation_: ", observation, "reward: ",  reward, "departure: ", departure)
        
        # Update total revenue
        total_revenue += reward 

        # agent memorize transition
        agent.store_transition(observation, action, reward, observation_, departure)
        agent.learn()

        # Move on to the next state
        observation = observation_

        # Move on to the next price point
        price_status_dict = price_status_dict_
        n_step += 1

    if total_revenue > best_score:
        best_score = total_revenue
        
    DQN_total_revenues.append(total_revenue) 
    DQN_per_50_avg.append(np.mean(DQN_total_revenues[-50:])) 
    DQN_cumulative_avg = sum(DQN_total_revenues) / (episode)
    DQN_cumulative_average.append(DQN_cumulative_avg)
    print('Episode:',episode ,'Total Revenue:', total_revenue,
             'Cumulative average %.1f' % DQN_cumulative_avg, 'best score:', best_score,
            'epsilon %.2f' % agent.epsilon, 'steps', n_step)

action 49


IndexError: index 49 is out of bounds for axis 0 with size 3

In [None]:
# Plot results
plt.figure(figsize=(10, 6))  
plt.plot(range(num_episodes), DQN_total_revenues, label='DQN each episode', alpha=0.5, color='b')
plt.plot(range(num_episodes), DQN_per_50_avg, label='DQN per 50 episode', alpha=0.5, color='green')
plt.plot(range(num_episodes), DQN_cumulative_average, label='DQN Cumulative')
plt.axhline(y=dp, color='r', label='DP')
plt.axhline(y=fcfs, color='orange', label='FCFS')
plt.xlabel('Episode')
plt.ylabel('Total Reward')
plt.title('Total Reward per Episode')
plt.legend()  
plt.grid(True)
plt.show()