In [1]:
using DataFrames, XLSX, DataStructures, Random

In [None]:
xf = XLSX.readxlsx("velib_data_simulation.xlsx")
data = xf["Feuille1"]

parser = x -> if x isa String parse(Float64, x) elseif x isa Number Float64(x) else NaN end

# NOTE that the stations will be shifted : stations are 3:7 in the sheet, but 1:5 in the code
# station 1 in the code correspond to station 3 in the sheets, ...
# General data
routing = parser.(data["B4:F8"])

# Express everything in minutes : average number of departures per minute  
departures = vec(parser.(data["B13:F13"])) ./ 60

M = length(departures)

traversal_times = convert.(Float64, parser.(data["B18:F22"]))

# Initial condiditons
docks_per_station = vec(data["B26:F26"]) # Should not be used

initial_bikes = vec(data["B30:F30"])
sum(initial_bikes) # Souldn't it be 100 ? Oh well anyways the initial state should not matter anyways

traversal_times


5

In [3]:
# Sampling sanity check 
SAMPLES = 1000000


# Test using StatsBase
using StatsBase
counts_1 = zeros(Int, 5)
@time begin
    for _ ∈ 1:SAMPLES
        res = sample(1:5, Weights(@view routing[1, :]))
        counts_1[res] += 1
    end
end

# Using a custom sampler
"""
Given a sorted vector of thresholds that represent [p1, p1+p2, ..., p1 + ... + pn-1, p1 + ... + pn = 1],
LAST ELEMENT HAS TO BE 1
Return a random int in 1:n with probabilities of p1 for 1, ...
"""
function custom_sample(thresholds :: Vector{Float64})
    u = rand()
    res = findfirst(x -> (x > u), thresholds)
    return res
end

counts_2 = zeros(Int, 5)
thresholds = [sum(routing[1, 1:i]) for i ∈ 1:5]

@time begin
    for _ ∈ 1:SAMPLES
        res = custom_sample(thresholds)
        counts_2[res] += 1
    end
end

# Sanity check
println("Draw frequencies 1 : $(counts_1 ./ SAMPLES)")
println("Draw frequencies 2 : $(counts_2 ./ SAMPLES)")

# Fastest is custom sampler with precomputed thresholds !

  1.061600 seconds (8.15 M allocations: 220.910 MiB, 8.71% gc time, 9.34% compilation time)
  0.218059 seconds (4.99 M allocations: 91.475 MiB, 5.37% gc time)
Draw frequencies 1 : [0.0, 0.20073, 0.299568, 0.199807, 0.299895]
Draw frequencies 2 : [0.0, 0.200479, 0.300122, 0.199978, 0.299421]


In [4]:
# Exponential draw sanity check
SAMPLES = 1000000

# Test using Distributions.jl
using Distributions
distrib = Exponential(1 / departures[1])
res_1 = []
@time begin
    for _ ∈ 1:SAMPLES
        res = rand(distrib)
        push!(res_1, res)
    end
end

# Test using a custom function
function draw_exponential(lambda :: Float64)
    u = rand()
    return (-1 / lambda) * log(u)
end

res_2 = []
@time begin
    for _ ∈ 1:SAMPLES
        res = draw_exponential(departures[1])
        push!(res_2, res)
    end
end

# Sanity check
println("Average for first distrib : $(sum(res_1) / SAMPLES), for second : $(sum(res_2) / SAMPLES) ")

# We see that the custom hand made one is a little bit faster, or near enough. Will will thus only use custom function !

  0.380615 seconds (4.01 M allocations: 104.877 MiB, 6.87% gc time, 1.79% compilation time)
  0.457494 seconds (5.00 M allocations: 119.837 MiB, 5.44% gc time)
Average for first distrib : 21.397709671619417, for second : 21.42954594414596 


In [5]:
# Next departure in at t, with time = t + X where X ~ Exp(lambda)
struct Departure
    time :: Float64
    origin :: Int
end

# At a departure time, draw the destination using the routing matrix, and set the arrival to t + travel_time[origin, dest]
# Or should travel time also be drawn with an exponential ? Might be ?
struct Arrival
    time :: Float64
    origin :: Int
    destination :: Int
end

Event = Union{Departure, Arrival};

In [20]:

function simulator(
    departures :: Vector{Float64},
    routing_probas :: Matrix{Float64},
    traversal_times :: Matrix{Float64},
    initial_conditions :: Vector{Int},
    Tmax :: Float64,
    verbose :: Bool = false,
    seed :: Int = 42,
)
    Random.seed!(seed)
    t = 0
    N = sum(initial_conditions)
    M = length(initial_conditions)

    event_queue = PriorityQueue{Event, Float64}(Base.Order.Forward)

    # Initial departures
    for i ∈ 1:M
        # Draws a random time with the given distribution
        t_dep = draw_exponential(departures[i])
        event_queue[Departure(t_dep, i)] = t_dep
    end

    # Pre-build the threshold vectors used for sampling destinations
    routing_thresholds = [[sum(routing[i, 1:j]) for j ∈ 1:M] for i ∈ 1:M]
    # Correct possible floating point errors where probas might not exactly add to 1.0
    for i ∈ 1:M
        routing_thresholds[i][end] = 1.0
    end

    state = copy(initial_conditions)
    journeys_in_progress = zeros(Int, (M,M))

    iter = 0

    # Statistics 
    
    # Counting with Poisson arrivals
    empty_when_we_try_departure = zeros(Int, M) # Count the number of times a station is empty before we try to depart from it
    # Counting with average
    last_departures = zeros(Float64, M) # Keep track of the last time there was a departure from each station.
    time_spent_at_zero = zeros(Float64, M) # When a bike arrives at an empty station, we thus know how long it spent empty
    departures_per_station = zeros(Int, M) # Count the number of departures we try at each station
    

    while t < Tmax

        # if verbose && mod(iter, 100) == 0
        #     println("$iter events treated, time is $t")
        # end

        event = dequeue!(event_queue)
    
        if event isa Departure
            t = event.time
            origin = event.origin
            departures_per_station[origin] += 1
            if state[origin] > 0
                # Pick the destination and create the arrival event
                dest = custom_sample(routing_thresholds[origin])
                t_arrival = t + traversal_times[origin, dest]
                event_queue[Arrival(t_arrival, origin, dest)] = t_arrival
                journeys_in_progress[origin, dest] += 1
                state[origin] -= 1
                last_departures[origin] = t
            else
                # Journey is lost, do something ?
                empty_when_we_try_departure[origin] += 1
            end

            # In any case, draw the next departure event
            t_next = t + draw_exponential(departures[origin])
            event_queue[Departure(t_next, origin)] = t_next

        elseif event isa Arrival
            t = event.time
            ori, dest = event.origin, event.destination
            # If we arrive at empty station, the station spent (t - last_departures[dest]) minutes empty !
            if state[dest] == 0
                time_spent_at_zero[dest] += (t - last_departures[dest])
            end
            # Simply update the states (We do not consider station capacity !)
            journeys_in_progress[ori, dest] -= 1
            state[dest] += 1
        end

        iter += 1
    end

    lost_prop = empty_when_we_try_departure ./ departures_per_station
    average_at_zero = time_spent_at_zero ./ t

    if verbose
        println("Reached Tmax = $Tmax, $iter events treated")
        println("Sanity check : number of bikes is $(sum(state) + sum(journeys_in_progress))")
        println("----------------------------")
        println("Proportion of departures lost due to empty station : $lost_prop")
        println("Proportion of the time spent empty : $average_at_zero")
        println("Gap : $(abs.(lost_prop - average_at_zero))")
    end
    
    return state, lost_prop, average_at_zero
end


simulator (generic function with 3 methods)

In [23]:
Tmax_Single = 5e8
initial_state = ones(Int, M)

simulator(
    departures,
    routing,
    traversal_times,
    initial_state,
    Tmax_Single,
    true
);

Reached Tmax = 5.0e8, 251436627 events treated
Sanity check : number of bikes is 5
----------------------------
Proportion of departures lost due to empty station : [0.42677313871331746, 0.49232575434273734, 0.5606050381681621, 0.4914860111320359, 0.4796832930750249]
Proportion of the time spent empty : [0.42669268554719164, 0.4924012072873502, 0.5604727659297177, 0.4914801466076327, 0.47970569652622713]
Gap : [8.045316612581566e-5, 7.545294461286378e-5, 0.0001322722384443642, 5.8645244032096144e-6, 2.2403451202246405e-5]


In [25]:
Tmax_Multiple = 1e5
N_simus = 10000

simus_ending_at_zero = zeros(Int, M)

Threads.@threads for i ∈ 1:N_simus
    state, _, __ = simulator(   
        departures,
        routing,
        traversal_times,
        initial_state,
        Tmax_Multiple,
        false,
        i
    )
    for (i, s) in enumerate(state)
        if s == 0
            simus_ending_at_zero[i] += 1
        end
    end
end

println("Proportion of simulation where each station ends with zero bikes : $(simus_ending_at_zero ./ N_simus)")

Proportion of simulation where each station ends with zero bikes : [0.4307, 0.4905, 0.5652, 0.4983, 0.4797]
