In [6]:
import pandas as pd
import numpy as np

n_demand_levels = 11
min_demand_level = 100
max_demand_level = 200
demand_levels = np.linspace(min_demand_level, max_demand_level, n_demand_levels)

max_tickets = 100
max_days = 50

# Initialize Q and V matrices
Q = np.zeros([max_tickets, max_tickets, n_demand_levels, max_days])
V = np.zeros([max_tickets, max_days])

# Populate the Q and V matrices
for tickets_left in range(max_tickets):
    for tickets_sold in range(tickets_left + 1):  # Allow selling all tickets
        for demand_index, demand_level in enumerate(demand_levels):
            price = max(demand_level - tickets_sold, 0)
            Q[tickets_sold, tickets_left, demand_index, 0] = price * tickets_sold

        revenue_from_best_quantity_at_each_demand_level = Q[:, tickets_left, :, 0].max(axis=0)
        V[tickets_left, 0] = revenue_from_best_quantity_at_each_demand_level.mean()

for days_left in range(1, max_days):
    for tickets_left in range(max_tickets):
        for tickets_sold in range(tickets_left):
            for demand_index, demand_level in enumerate(demand_levels):
                price = max(demand_level - tickets_sold, 0)
                rev_today = price * tickets_sold
                Q[tickets_sold, tickets_left, demand_index, days_left] = rev_today + V[tickets_left - tickets_sold, days_left - 1]

        expected_total_rev_from_best_quantity_at_each_demand_level = Q[:, tickets_left, :, days_left].max(axis=0)
        V[tickets_left, days_left] = expected_total_rev_from_best_quantity_at_each_demand_level.mean()

# Define the pricing function
def pricing_function(days_left, tickets_left, demand_level):
    demand_level_index = np.abs(demand_level - demand_levels).argmin()
    day_index = days_left - 1  # Arrays are 0 indexed
    tickets_index = int(tickets_left)  # Convert to int for indexing
    relevant_Q_vals = Q[:, tickets_index, demand_level_index, day_index]
    desired_quantity = relevant_Q_vals.argmax()
    price = demand_level - desired_quantity
    return price

# Create a DataFrame to store the data
data = []
for days_left in range(max_days):
    for tickets_left in range(max_tickets):
        for demand_level in demand_levels:
            price = pricing_function(days_left, tickets_left, demand_level)
            data.append([days_left, tickets_left, demand_level, price])

df = pd.DataFrame(data, columns=['days_until_flight', 'seats_left', 'demand_level', 'price'])
df.tail(100)

Unnamed: 0,days_until_flight,seats_left,demand_level,price
54900,49,90,200.0,188.0
54901,49,91,100.0,100.0
54902,49,91,110.0,110.0
54903,49,91,120.0,120.0
54904,49,91,130.0,130.0
...,...,...,...,...
54995,49,99,160.0,160.0
54996,49,99,170.0,170.0
54997,49,99,180.0,177.0
54998,49,99,190.0,182.0


In [4]:
print(len(df))

55000


In [7]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Splitting the data into training and testing sets
X = df[['days_until_flight', 'seats_left', 'demand_level']]
y = df['price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Training the Random Forest Regressor
regressor = RandomForestRegressor(n_estimators=100, random_state=42)
regressor.fit(X_train, y_train)

# Predicting on the test set
y_pred = regressor.predict(X_test)

# Evaluating the model
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Absolute Error: {mae}")
print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")

Mean Absolute Error: 0.09996090909090906
Mean Squared Error: 0.05646680909090907
R^2 Score: 0.99993515344334
