In [1]:
using StatsBase, Random, LinearAlgebra, DecisionTree, DataFrames, MLDataUtils, ScikitLearn, CSV, Plots, Gurobi, JuMP, Plots.PlotMeasures

# Scenario Generation

In [2]:
function scenario_generation(y_neighbors,method="knn")
    """
    generate scenarios of cancelation/non-cancelation based on historic data.
    Procedure will depend on whether the prescriptive method is knn or OCT
    
    @param y_neighbors: n x k matrix holding possible cancelation scenarios for each observation
    """
    n,k = size(y_neighbors)
    y_hat = zeros(n) 
    if method == "knn"
        for i=1:n
            p = sum(y_neighbors[i,:])/k
            y_hat[i] = p  #(rand() < p) ? 1 : 0 #1 with probability p, otherwise 0
        end
    end
    return y_hat
end

scenario_generation (generic function with 2 methods)

In [111]:
s = vec(Matrix(CSV.read("s.csv",DataFrame; header=0)));
s = broadcast(x -> Integer(x), s)
e = vec(Matrix(CSV.read("e.csv",DataFrame; header=0)));
e = broadcast(x -> Integer(x), e)
p = vec(Matrix(CSV.read("p.csv",DataFrame; header=0)));
t = vec(Matrix(CSV.read("t.csv",DataFrame; header=0)));
t = broadcast(x -> Integer(x), t)
y_neighbors = (Matrix(CSV.read("y_neighbors.csv",DataFrame; header=0)));
room_capacities = 450
Q = ones(5).*room_capacities #vec([5,5,5,5,5,5]);
M = 10^5;
f = 50;
q = 500;

In [112]:
X_new = (Matrix(CSV.read("X_new.csv",DataFrame; header=0)));
y_new = vec(Matrix(CSV.read("y_new.csv",DataFrame; header=0)));
y_new = broadcast(x -> Integer(x), y_new);
y_hat =  scenario_generation(y_neighbors);

# Baseline Approach

The baseline approach will never allow a scenario where overbooking could happen.

In [113]:
function prescribe_bookings_baseline(Q,q,f,p,s,e,t)
    """
    A function that receives n booking requests over a time period 1,...,T and returns
    a decision on each booking (accept or reject) so as to maximize revenue.
    
    @param Q: vector length J. J is the number of room types we have (1 person, 2 person, etc.) Q_j is 
    the number of rooms available we have of type j 
    @param q: constant. this is the cost incurred if a customer is overbooked 
    @param f: constant. Money received if a booking is canceled 
    @param p: vector length n. The price of each booking 
    @param s: vector length n. The starting day of each booking
    @param e: vector length n. The ending day of each booking
    @param t: vector length n. This gives the number of people that the ith booking is for
    @param y_neighbors: binary matrix n X K. This is the historical neighbour data,
                        whether the person canceled or not
    @param M: big M constraints
    @param y_hat: estimate of cancelation probabilities 
    """
    J = length(Q)
    n = length(s)
    
    gurobi_env=Gurobi.Env(); #to suppress some Gurobi outputs
    model = Model(with_optimizer(Gurobi.Optimizer, gurobi_env))
    set_optimizer_attribute(model, "OutputFlag", 0)
    #set_optimizer_attribute(model, "MIPGap", 0.1) #needed to ensure convergence

    
    @variable(model,z[i=1:n],Bin) #accept booking i or not

    #STRATEGY: Never allow num_people_booked > capacity 
    for i=1:n
        booked_people = 0 
        for i2=1:n
            if (s[i2] <= s[i]) && (e[i2] > s[i]) && (i2 !== i) && (t[i2] == t[i])
                booked_people += 1*z[i2]
            end
        end            
        @constraint(model,booked_people <= Q[t[i]])
    end
    
    @objective(model,Max, sum(z[i]*p[i] for i=1:n))
    optimize!(model)
    return objective_value(model) , value.(z)
end

prescribe_bookings_baseline (generic function with 1 method)

In [114]:
obj_baseline, z_baseline = prescribe_bookings_baseline(Q,q,f,p,s,e,t);

Academic license - for non-commercial use only - expires 2022-08-18


# kNN-based prescriptions

In [115]:
function prescribe_bookings_knn_new(Q,q,f,p,s,e,t,y_neighbors,y_hat)
    """
    A function that receives n booking requests over a time period 1,...,T and returns
    a decision on each booking (accept or reject) so as to maximize revenue.
    
    @param Q: vector length J. J is the number of room types we have (1 person, 2 person, etc.) Q_j is 
    the number of rooms available we have of type j 
    @param q: constant. this is the cost incurred if a customer is overbooked 
    @param f: constant. Money received if a booking is canceled 
    @param p: vector length n. The price of each booking 
    @param s: vector length n. The starting day of each booking
    @param e: vector length n. The ending day of each booking
    @param t: vector length n. This gives the number of people that the ith booking is for
    @param y_neighbors: binary matrix n X K. This is the historical neighbour data,
                        whether the person canceled or not
    @param M: big M constraints
    @param y_hat: estimate of whether people will cancel or not 
    """
    J = length(Q)
    n,K = size(y_neighbors)
    s = s .+ 1
    T = Int(maximum(s))
    
    gurobi_env=Gurobi.Env(); #to suppress some Gurobi outputs
    model = Model(with_optimizer(Gurobi.Optimizer, gurobi_env))
    set_optimizer_attribute(model, "OutputFlag", 0)
    #set_optimizer_attribute(model, "MIPGap", 0.1) #needed to ensure convergence

    
    @variable(model,z[i=1:n],Bin) #accept booking i or not
    @variable(model,m[time=1:T,j=1:J]) #model ith min (for penalty terms)
    @variable(model,exp_occupancy[time=1:T,j=1:J]) #expected occupancy at time t for room size j
    
    #STRATEGY: multiply expected surplus by q to penalise overbooking in objective

    obj = 0
    for time=1:T
        for j=1:J
            expected_people = 0
            for i=1:n
                if (s[i] <= time) && (e[i] > time) && (t[i] == j)
                    expected_people += z[i]*(1-y_hat[i])*1
                end
            end   
            @constraint(model, exp_occupancy[time,j] == expected_people)
            @constraint(model, m[time,j] <= 0)
            @constraint(model, m[time,j] <= Q[j] - exp_occupancy[time,j])
            #objective_term = min(Q[t[i]] - expected_people,0)*(q/n)
            if length(p[s.==time]) >= 1
                day_avg =  mean(p[s.==time])
            else
                day_avg = 0
            end
            obj += m[time,j]*(q+day_avg) #(q/n)
        end
    end
    
    obj += sum((1/K)*z[i]*(y_neighbors[i,k]*f + (1-y_neighbors[i,k])*p[i]) for k=1:K for i=1:n)
    
    @objective(model,Max, obj)
    optimize!(model)
    return objective_value(model) , value.(z), value.(m), value.(exp_occupancy)
end

prescribe_bookings_knn_new (generic function with 1 method)

In [132]:
function prescribe_bookings_knn(Q,q,f,p,s,e,t,y_neighbors,y_hat,alpha=1)
    """
    A function that receives n booking requests over a time period 1,...,T and returns
    a decision on each booking (accept or reject) so as to maximize revenue.
    
    @param Q: vector length J. J is the number of room types we have (1 person, 2 person, etc.) Q_j is 
    the number of rooms available we have of type j 
    @param q: constant. this is the cost incurred if a customer is overbooked 
    @param f: constant. Money received if a booking is canceled 
    @param p: vector length n. The price of each booking 
    @param s: vector length n. The starting day of each booking
    @param e: vector length n. The ending day of each booking
    @param t: vector length n. This gives the number of people that the ith booking is for
    @param y_neighbors: binary matrix n X K. This is the historical neighbour data,
                        whether the person canceled or not
    @param M: big M constraints
    @param y_hat: estimate of cancelation probabilities 
    """
    J = length(Q)
    n,K = size(y_neighbors)
    
    gurobi_env=Gurobi.Env(); #to suppress some Gurobi outputs
    model = Model(with_optimizer(Gurobi.Optimizer, gurobi_env))
    set_optimizer_attribute(model, "OutputFlag", 0)
    #set_optimizer_attribute(model, "MIPGap", 0.1) #needed to ensure convergence

    
    @variable(model,z[i=1:n],Bin) #accept booking i or not
    #@variable(model,b[i=1:n],Bin) #person i is overbooked or not
    #@variable(model,exp[i=1:n,j=1:J]>=0,Int) #number rooms occupied of type j when person i arrives
    
    #@constraint(model,[i=1:n,j=1:J],M*b[i]>=w[i,j]-Q[j]+1)

    #STRATEGY: Say that the expected number of guests cannot exceed capacity
    for i=1:n
        expected_people = 0 
        for i2=1:n
            if (s[i2] <= s[i]) && (e[i2] > s[i]) && (i2 !== i) && (t[i2] == t[i])
                expected_people += z[i2]*(1-y_hat[i2])*1
            end
        end            
        @constraint(model,expected_people <= alpha*Q[t[i]])
    end
    
    @objective(model,Max, sum(z[i]*(y_neighbors[i,k]*f + (1-y_neighbors[i,k])*p[i]) for k=1:K for i=1:n))
    optimize!(model)
    return objective_value(model) , value.(z)
end

prescribe_bookings_knn (generic function with 2 methods)

In [117]:
obj_knn, z_knn, m_knn, exp_knn = prescribe_bookings_knn_new(Q,q,f,p,s,e,t,y_neighbors,y_hat);

Academic license - for non-commercial use only - expires 2022-08-18


In [119]:
obj_knn2, z_knn2 = prescribe_bookings_knn(Q,q,f,p,s,e,t,y_neighbors,y_hat);

Academic license - for non-commercial use only - expires 2022-08-18


# Evaluate Choice

In [122]:
function eval_choice(z,y,Q,q,f,p,s,e,t)
    """
    s/z has to be sorted by arrival data
    """
    J = length(Q)
    n,K = size(y_neighbors)
    total_profit = 0 
    num_overbooked = 0
    num_canceled = 0
    bookings_sold = 0
    for i=1:n
        if z[i] == 1
            if y[i] == 1
                total_profit += f
                num_canceled += 1
            else
                num_visitors = 0 
                for i2=1:(i-1)
                    if (e[i2] > s[i]) && (t[i]==t[i2]) 
                        num_visitors += z[i2]*(1-y[i2])
                    end
                end
                #println("num visitors: ",num_visitors)
                if num_visitors < Q[t[i]]
                    total_profit += p[i]
                    bookings_sold += 1
                else
                    num_overbooked += 1
                    total_profit -= q
                end
            end
        end
    end
    
    return total_profit,num_overbooked, num_canceled, bookings_sold
end

eval_choice (generic function with 1 method)

In [123]:
total_profit_baseline,num_overbooked_baseline, num_canceled_baseline, bookings_sold_baseline = eval_choice(z_baseline,y_new,Q,q,f,p,s,e,t)

(929970.3300000009, 0, 953, 1771)

In [124]:
total_profit_knn,num_overbooked_knn, num_canceled_knn, bookings_sold_knn = eval_choice(z_knn,y_new,Q,q,f,p,s,e,t)

(929970.3300000009, 0, 953, 1771)

In [125]:
total_profit_knn2,num_overbooked_knn2, num_canceled_knn2, bookings_sold_knn2 = eval_choice(z_knn2,y_new,Q,q,f,p,s,e,t)

(929970.3300000009, 0, 953, 1771)

In [18]:
Q

5-element Vector{Float64}:
 100.0
 100.0
 100.0
 100.0
 100.0

In [19]:
sum(z_baseline)

1441.0

# Experiments 

In [22]:
function capacity_experiment(Qs,q,f,p,s,e,t,y_neighbors,y_hat,y_new)
    """
    run baseline model as well as prescriptive model for varying hotel capacity levels and 
    compare revenue.
    
    @param Qs: list of capacities to test out
    """
    N = length(Qs)
    baseline_revenue = zeros(N)
    baseline_overbookings = zeros(N)
    baseline_cancelations = zeros(N)
    baseline_successful_bookings = zeros(N)
    knn_revenue = zeros(N)
    knn_overbookings = zeros(N)
    knn_cancelations = zeros(N)
    knn_successful_bookings = zeros(N)
    
    for i=1:N
        Q = ones(5).*Qs[i]
        
        obj_baseline, z_baseline = prescribe_bookings_baseline(Q,q,f,p,s,e,t);
        obj_knn, z_knn = prescribe_bookings_knn(Q,q,f,p,s,e,t,y_neighbors,y_hat);
        
        total_profit_baseline,num_overbooked_baseline, num_canceled_baseline, bookings_sold_baseline = eval_choice(z_baseline,y_new,Q,q,f,p,s,e,t)
        total_profit_knn,num_overbooked_knn, num_canceled_knn, bookings_sold_knn = eval_choice(z_knn,y_new,Q,q,f,p,s,e,t)
        
        baseline_revenue[i] = total_profit_baseline
        baseline_overbookings[i] = num_overbooked_baseline
        baseline_cancelations[i] = num_canceled_baseline
        baseline_successful_bookings[i] = bookings_sold_baseline
                
        knn_revenue[i] = total_profit_knn
        knn_overbookings[i] = num_overbooked_knn
        knn_cancelations[i] = num_canceled_knn
        knn_successful_bookings[i] = bookings_sold_knn
    end
    
    return (baseline_revenue, baseline_overbookings, baseline_cancelations, baseline_successful_bookings,
            knn_revenue, knn_overbookings, knn_cancelations, knn_successful_bookings)
end

capacity_experiment (generic function with 1 method)

In [126]:
Qs = 25:25:450
(baseline_revenue, baseline_overbookings, baseline_cancelations, baseline_successful_bookings,
 knn_revenue, knn_overbookings, knn_cancelations, knn_successful_bookings) = capacity_experiment(Qs,q,f,p,s,e,t,y_neighbors,y_hat,y_new);

Academic license - for non-commercial use only - expires 2022-08-18
Academic license - for non-commercial use only - expires 2022-08-18
Academic license - for non-commercial use only - expires 2022-08-18
Academic license - for non-commercial use only - expires 2022-08-18
Academic license - for non-commercial use only - expires 2022-08-18
Academic license - for non-commercial use only - expires 2022-08-18
Academic license - for non-commercial use only - expires 2022-08-18
Academic license - for non-commercial use only - expires 2022-08-18
Academic license - for non-commercial use only - expires 2022-08-18
Academic license - for non-commercial use only - expires 2022-08-18
Academic license - for non-commercial use only - expires 2022-08-18
Academic license - for non-commercial use only - expires 2022-08-18
Academic license - for non-commercial use only - expires 2022-08-18
Academic license - for non-commercial use only - expires 2022-08-18
Academic license - for non-commercial use only -

In [137]:
plot(Qs,[baseline_revenue,knn_revenue], label = ["baseline" "kNN prescription"],
   size=(2500,2500), xtickfontsize=22,ytickfontsize=22, xlabel = "Capacity", ylabel = "Revenue (EUR)",
   labelfontsize=28, left_margin=15mm, legendfontsize=24, title="Revenue versus Hotel Capacity",
   titlefontsize=38,legend = :topleft)
savefig("hotel_rev_vs_capacity.pdf")

In [138]:
plot(Qs,[baseline_cancelations,knn_cancelations], label = ["baseline" "kNN prescription"],
   size=(2500,2500), xtickfontsize=22,ytickfontsize=22, xlabel = "Capacity", ylabel = "Cancelations",
   labelfontsize=28, left_margin=15mm, legendfontsize=24, title="Bookings Canceled versus Hotel Capacity",
   titlefontsize=38,legend = :topleft)
savefig("hotel_cancel_vs_capacity.pdf")

In [139]:
plot(Qs,[baseline_overbookings,knn_overbookings], label = ["baseline" "kNN prescription"],
   size=(2500,2500), xtickfontsize=22,ytickfontsize=22, xlabel = "Capacity", ylabel = "Overbookings",
   labelfontsize=28, left_margin=15mm, legendfontsize=24, title="Overbookings versus Hotel Capacity",
   titlefontsize=38,legend = :topleft)
savefig("overbooking_vs_capacity.pdf")

In [140]:
plot(Qs,[baseline_successful_bookings,knn_successful_bookings], label = ["baseline" "kNN prescription"],
   size=(2500,2500), xtickfontsize=22,ytickfontsize=22, xlabel = "Capacity", ylabel = "Successful Bookings",
   labelfontsize=28, left_margin=15mm, legendfontsize=24, title="Successful Bookings Hotel Capacity",
   titlefontsize=38,legend = :topleft)
savefig("successful_vs_capacity.pdf")

In [None]:
# function validate_alpha(Q,q,f,p,s,e,t,y_neighbors,y_hat,alphas)
    
# end