<a href="https://colab.research.google.com/github/Shyam657/Dynamic-Flight-Price-Prediction-Based-on-Remaining-Date/blob/main/Dynamic_Pricing_to_fill_slots.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install xgboost

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
import pandas as pd
import numpy as np
import random
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import mean_squared_error as MSE
import warnings
import math
import seaborn as sns
import matplotlib.pyplot as plt
warnings.filterwarnings("ignore")

In [3]:
#Booking starts 30 days in advance
days = 30
min_price = 4000
max_price = 14000
total_slots = 250
journey = 50
train_val_prop = 0.75
lst_days = list(range(1,days+1))
wastage_pct_min_range = 15
wastage_pct_max_range = 20
emptiness_threshold = 0.02
optimisation_day_bfr_jouney = 10

<font color=green> 
The lower the price, the more bookings. Bookings also increase over time as people gain clarity about their plans.
There are other factors such as weekday, weekend, special events, days before journey, and so on, but to keep things simple, we will assume that things are only price and time dependent, and generate data accordingly.

We will use the number of days until our trip as a lever to implement a self-correcting dynamic pricing strategy.
</font>

In [4]:
lst_df = []
for j in range(journey):
    lst_price = []
    prev_price = min_price
    for i in range(days):
        cur_price = min(prev_price + random.randint(0,500), max_price)
        prev_price = cur_price
        lst_price.append(cur_price)

    slots_filled =  round((1 - random.randint(wastage_pct_min_range,wastage_pct_max_range)/100.0) * total_slots,0)
    lst_slots = [] 
    weights = []
    for time_,p in enumerate(lst_price):
        
        # More bookings comes with time but also reduces as price increases
        weights.append(((time_ + 1)* random.uniform(1,1.2))/math.pow(p,5.0))

    msum = sum(weights)
    weights = [w/msum for w in weights]


    for w in weights:
        lst_slots.append(round(w * slots_filled, 0) )


    lst_df.append(pd.DataFrame( {'journey_id' : [j+1] * days, 'day':lst_days, 'price' : lst_price, 'slots' : lst_slots} ))
    

df = pd.concat(lst_df)

#Let's train on x% of journeys and validate learn't model performance on the remaining (1-x)%.

df_train = pd.concat(lst_df[:int(train_val_prop * len(lst_df)) + 1])
df_val = pd.concat(lst_df[int(train_val_prop * len(lst_df)) + 1:])


print("df's shape", df.shape)
print("df_train's shape", df_train.shape)
print("df_val's shape", df_val.shape)

df's shape (1500, 4)
df_train's shape (1140, 4)
df_val's shape (360, 4)


In [5]:
df.head()

Unnamed: 0,journey_id,day,price,slots
0,1,1,4359,8.0
1,1,2,4530,12.0
2,1,3,4952,12.0
3,1,4,5191,15.0
4,1,5,5390,14.0


#Let's Check Data

In [12]:
df[df.journey_id == 10]

Unnamed: 0,journey_id,day,price,slots
0,10,1,4356,10.0
1,10,2,4801,12.0
2,10,3,4901,16.0
3,10,4,5234,16.0
4,10,5,5553,15.0
5,10,6,5799,13.0
6,10,7,5874,16.0
7,10,8,6226,14.0
8,10,9,6683,10.0
9,10,10,6752,10.0


In [None]:
df_train.slots

0      8.0
1     12.0
2     16.0
3     16.0
4     18.0
      ... 
25     2.0
26     2.0
27     2.0
28     1.0
29     1.0
Name: slots, Length: 1140, dtype: float64

In [None]:
from xgboost import XGBRegressor
from numpy import asarray
model = XGBRegressor()
model.fit(df_train[['price','day']], np.array(df_train.slots))

XGBRegressor(base_score=0.5, booster='gbtree', callbacks=None,
             colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
             early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
             importance_type=None, interaction_constraints='',
             learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,
             max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,
             missing=nan, monotone_constraints='()', n_estimators=100, n_jobs=0,
             num_parallel_tree=1, predictor='auto', random_state=0, reg_alpha=0,
             reg_lambda=1, ...)

In [None]:
def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def mean_absolute_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs(y_true - y_pred)) 

rmse = round(math.sqrt(MSE(df_val['slots'],model.predict(df_val[['price','day']]))),2)

mae = round(mean_absolute_error( df_val['slots'],model.predict(df_val[['price','day']]) ),2)

mape = round(mean_absolute_percentage_error( df_val['slots'],model.predict(df_val[['price','day']]) ),2)

print(f"Root Mean squared error : {rmse}\nMean absolute error : {mae}\nMean Absolute percentage error : {mape}%")

Root Mean squared error : 1.59
Mean absolute error : 1.05
Mean Absolute percentage error : 16.91%


In [None]:
# Store predictions for fast stroage which will be utilised by the optimisation layer.

mp = {}
lst = []
for p in range(min_price, max_price+1):
    for d in range(days - optimisation_day_bfr_jouney , days +  1):
        lst.append( [p,d] )
        
pred = model.predict( np.array(lst))

for i in range(len(lst)):
    mp[lst[i][0],lst[i][1]] = int(pred[i])

In [None]:
def m_feasible(price_points, available_slots, emptiness_threshold):
    tmp = 0
    for p in price_points:
#         tmp = tmp + int(model.predict(np.asarray([[p[0],p[1]]]))[0])
        tmp = tmp + mp[p[0],p[1]]

    if tmp <= available_slots and total_slots * emptiness_threshold <= (available_slots - tmp):
        return True
    return False

In [None]:
def m_revenue(price_points):
    rev = 0
    slots = []
    for p in price_points:
        
        s_filled = mp[p[0],p[1]]
        slots.append(s_filled)
        rev = rev + (s_filled * p[0])

    return rev,slots

In [None]:
def optimise(df, journey_id):

    df_tmp = df[(df.journey_id == journey_id) & (df.day > (days - optimisation_day_bfr_jouney))].reset_index(drop = True)

    slots_filled = df[ (df.journey_id == journey_id) & (df.day < (days - optimisation_day_bfr_jouney)) ].slots.sum()

    available_slots = (total_slots - slots_filled) 
    
    # random search
    times = 50000
    ans = 0
    solution = []
    for j in range(times):
        price_points = []
        prev_price = min_price
        for i in range(optimisation_day_bfr_jouney):
            cur_price = random.randint(prev_price,int(prev_price * 1.15))
            if cur_price > max_price:
                break
            prev_price = cur_price
            price_points.append((cur_price,(days - optimisation_day_bfr_jouney + 1)))
        if m_feasible(price_points, available_slots, emptiness_threshold):
            if m_revenue(price_points)[0] > ans:
                ans, slots = m_revenue(price_points)
                solution = [p[0] for p in price_points]
                
#     df_tmp = df[df.journey_id == 1].loc[days - optimisation_day_bfr_jouney: ].reset_index(drop = True)
    df_tmp['proposed_price'] = solution
    df_tmp['forecasted_slots'] = slots
    
    orig = np.sum(df_tmp['price'] * df_tmp['slots'])
    proposed = np.sum(df_tmp['proposed_price'] * df_tmp['forecasted_slots'])
    revenue_gain = round(proposed-orig, 2)
    revenue_gain_pct = round((proposed-orig)/orig * 100.0, 2)
    
    slots_extra_gain = round(df_tmp['forecasted_slots'].sum() - df_tmp.slots.sum() , 0)
    slots_extra_gain_pct = round( slots_extra_gain/df_tmp.slots.sum() *100, 2)
    
#     display(df_tmp)
    
    print(f"""Previous Revenue: {orig}\nNew Revenue: {proposed}\nRevenue gain: {revenue_gain}\nRevenue gain %: {revenue_gain_pct}
Available slots: {available_slots}\nPreviously filled: {df_tmp.slots.sum()}
Filled after dynamic Pricing: {df_tmp['forecasted_slots'].sum()}\nSlots gain: {slots_extra_gain}\nSlots gain %: {slots_extra_gain_pct}""")
    
    return df_tmp

In [None]:
# Optimise journey
journey_id = 1
optimise(df, journey_id)

Previous Revenue: 204513.0
New Revenue: 373791
Revenue gain: 169278.0
Revenue gain %: 82.77
Available slots: 61.0
Previously filled: 20.0
Filled after dynamic Pricing: 56
Slots gain: 36.0
Slots gain %: 180.0


Unnamed: 0,journey_id,day,price,slots,proposed_price,forecasted_slots
0,1,21,9225,3.0,4564,10
1,1,22,9267,3.0,5234,8
2,1,23,9734,2.0,5657,7
3,1,24,9935,2.0,6350,6
4,1,25,10419,2.0,7192,5
5,1,26,10573,2.0,7978,5
6,1,27,10972,2.0,8196,4
7,1,28,11191,2.0,8770,4
8,1,29,11550,1.0,8796,4
9,1,30,11839,1.0,9894,3


In [None]:
# Optimise journey
journey_id = 2
optimise(df, journey_id)

Previous Revenue: 246279.0
New Revenue: 412775
Revenue gain: 166496.0
Revenue gain %: 67.6
Available slots: 70.0
Previously filled: 22.0
Filled after dynamic Pricing: 65
Slots gain: 43.0
Slots gain %: 195.45


Unnamed: 0,journey_id,day,price,slots,proposed_price,forecasted_slots
0,2,21,10504,3.0,4581,12
1,2,22,10642,2.0,5182,12
2,2,23,10651,3.0,5561,9
3,2,24,10883,2.0,6205,6
4,2,25,11190,2.0,6954,6
5,2,26,11600,2.0,7873,4
6,2,27,11660,2.0,8198,4
7,2,28,11789,2.0,8211,4
8,2,29,11806,2.0,8615,4
9,2,30,11837,2.0,8757,4


In [None]:
# Optimise journey
journey_id = 3
optimise(df, journey_id)

Previous Revenue: 193346.0
New Revenue: 385851
Revenue gain: 192505.0
Revenue gain %: 99.57
Available slots: 63.0
Previously filled: 19.0
Filled after dynamic Pricing: 58
Slots gain: 39.0
Slots gain %: 205.26


Unnamed: 0,journey_id,day,price,slots,proposed_price,forecasted_slots
0,3,21,9155,3.0,4573,10
1,3,22,9324,2.0,5107,9
2,3,23,9539,2.0,5834,8
3,3,24,9901,2.0,6670,6
4,3,25,10271,2.0,7444,6
5,3,26,10676,2.0,7728,4
6,3,27,10709,2.0,8034,4
7,3,28,11063,2.0,8736,4
8,3,29,11223,1.0,8737,4
9,3,30,11692,1.0,9954,3
