In [1]:
using DataFrames, XLSX, DataStructures, Random

In [None]:
xf = XLSX.readxlsx("velib_data_simulation.xlsx")
data = xf["Feuille1"]

parser = x -> if x isa String parse(Float64, x) elseif x isa Number Float64(x) else NaN end

# NOTE that the stations will be shifted : stations are 3:7 in the sheet, but 1:5 in the code
# station 1 in the code correspond to station 3 in the sheets, ...
# General data
routing = parser.(data["B4:F8"])

# Express everything in minutes : average number of departures per minute  
departures = vec(parser.(data["B13:F13"])) ./ 60

traversal_times = convert.(Float64, parser.(data["B18:F22"]))

# Initial condiditons
docks_per_station = vec(data["B26:F26"]) # Should not be used

initial_bikes = vec(data["B30:F30"])
sum(initial_bikes) # Souldn't it be 100 ? Oh well anyways the initial state should not matter anyways

traversal_times

5×5 Matrix{Float64}:
 0.0  3.0  5.0  7.0  7.0
 2.0  0.0  2.0  5.0  5.0
 4.0  2.0  0.0  3.0  3.0
 8.0  6.0  4.0  0.0  2.0
 7.0  7.0  5.0  2.0  0.0

In [3]:
# Sampling sanity check 
SAMPLES = 1000000


# Test using StatsBase
using StatsBase
counts_1 = zeros(Int, 5)
@time begin
    for _ ∈ 1:SAMPLES
        res = sample(1:5, Weights(@view routing[1, :]))
        counts_1[res] += 1
    end
end

# Using a custom sampler
"""
Given a sorted vector of thresholds that represent [p1, p1+p2, ..., p1 + ... + pn-1, p1 + ... + pn = 1],
LAST ELEMENT HAS TO BE 1
Return a random int in 1:n with probabilities of p1 for 1, ...
"""
function custom_sample(thresholds :: Vector{Float64})
    u = rand()
    res = findfirst(x -> (x > u), thresholds)
    return res
end

counts_2 = zeros(Int, 5)
thresholds = [sum(routing[1, 1:i]) for i ∈ 1:5]

@time begin
    for _ ∈ 1:SAMPLES
        res = custom_sample(thresholds)
        counts_2[res] += 1
    end
end

# Sanity check
println("Draw frequencies 1 : $(counts_1 ./ SAMPLES)")
println("Draw frequencies 2 : $(counts_2 ./ SAMPLES)")

# Fastest is custom sampler with precomputed thresholds !

  1.382369 seconds (8.15 M allocations: 220.909 MiB, 6.39% gc time, 9.54% compilation time)
  0.320757 seconds (4.99 M allocations: 91.475 MiB, 6.13% gc time)
Draw frequencies 1 : [0.0, 0.200718, 0.300762, 0.199303, 0.299217]
Draw frequencies 2 : [0.0, 0.199975, 0.300348, 0.200226, 0.299451]


In [4]:
# Exponential draw sanity check
SAMPLES = 1000000

# Test using Distributions.jl
using Distributions
distrib = Exponential(1 / departures[1])
res_1 = []
@time begin
    for _ ∈ 1:SAMPLES
        res = rand(distrib)
        push!(res_1, res)
    end
end

# Test using a custom function
function draw_exponential(lambda :: Float64)
    u = rand()
    return (-1 / lambda) * log(u)
end

res_2 = []
@time begin
    for _ ∈ 1:SAMPLES
        res = draw_exponential(departures[1])
        push!(res_2, res)
    end
end

# Sanity check
println("Average for first distrib : $(sum(res_1) / SAMPLES), for second : $(sum(res_2) / SAMPLES) ")

# We see that the custom hand made one is a little bit faster, or near enough. Will will thus only use custom function !

  0.646343 seconds (4.01 M allocations: 104.877 MiB, 9.45% gc time, 1.59% compilation time)
  0.649722 seconds (5.00 M allocations: 119.838 MiB, 6.84% gc time)
Average for first distrib : 21.449430983976317, for second : 21.421730410870303 


In [11]:
# Next departure in at t, with time = t + X where X ~ Exp(lambda)
struct Departure
    time :: Float64
    origin :: Int
end

# At a departure time, draw the destination using the routing matrix, and set the arrival to t + travel_time[origin, dest]
# Or should travel time also be drawn with an exponential ? Might be ?
struct Arrival
    time :: Float64
    origin :: Int
    destination :: Int
end

Event = Union{Departure, Arrival}    

function simulator(
    departures :: Vector{Float64},
    routing_probas :: Matrix{Float64},
    traversal_times :: Matrix{Float64},
    initial_conditions :: Vector{Int},
    Tmax :: Float64,
    verbose :: Bool = false,
    seed :: Int = 42,
)
    #Random.seed!(seed)
    t = 0
    N = sum(initial_conditions)
    M = length(initial_conditions)

    event_queue = PriorityQueue{Event, Float64}(Base.Order.Forward)

    # Initial departures
    for i ∈ 1:M
        # Draws a random time with the given distribution
        t = draw_exponential(departures[i])
        event_queue[Departure(t, i)] = t
    end

    # Pre-build the threshold vectors used for sampling destinations
    routing_thresholds = [[sum(routing[i, 1:j]) for j ∈ 1:M] for i ∈ 1:M]
    # Correct possible floating point errors where probas might not exactly add to 1.0
    for i ∈ 1:M
        routing_thresholds[i][end] = 1.0
    end

    state = copy(initial_conditions)
    journeys_in_progress = zeros(Int, (M,M))

    iter = 0

    while t < Tmax

        if verbose && mod(iter, 100) == 0
            println("$iter events treated, time is $t")
        end

        event = dequeue!(event_queue)
    
        if event isa Departure
            t = event.time
            origin = event.origin
            if state[origin] > 0
                # Pick the destination and create the arrival event
                dest = custom_sample(routing_thresholds[origin])
                t_arrival = t + traversal_times[origin, dest]
                event_queue[Arrival(t_arrival, origin, dest)] = t_arrival
                journeys_in_progress[origin, dest] += 1
                state[origin] -= 1
            else
                # Journey is lost, do something ?
            end

            # In any case, draw the next departure event
            t_next = t + draw_exponential(departures[origin])
            event_queue[Departure(t_next, origin)] = t_next

        elseif event isa Arrival
            t = event.time
            # Simply update the states (We do not consider station capacity !)
            journeys_in_progress[event.origin, event.destination] -= 1
            state[event.destination] += 1
        end

        iter += 1
    end

    if verbose
        println("Reached Tmax = $Tmax, $iter events treated")
    end
    
end

simulator(
    departures,
    routing,
    traversal_times,
    20 * ones(Int, 5),
    1000.0,
    true
)

0 events treated, time is 14.000254759510609
100 events treated, time is 140.96455570981476
200 events treated, time is 289.2435785140965
300 events treated, time is 475.81465372792854
400 events treated, time is 614.2853211755706
500 events treated, time is 765.1859693292406
600 events treated, time is 914.5404170897347
Reached Tmax = 1000.0, 675 events treated
