In [None]:
using Glob
using JSON
using LargeScaleAnalysis
using ProgressMeter
using PyCall
using PyPlot
using Statistics
using StatsBase

In [None]:
include("../../ParsimoniousMonitoring/notebooks/thesis.jl")

### Helpers

In [None]:
mesh = parsefile(AnchoringMesh, "../data/mesh_20200520.json")
ping_traceroute = measurement_mapping(mesh, 4, "ping", "traceroute");

In [None]:
parsename(f) = map(x -> parse(Int, x), match(r"/?(\d+)_(\d+)\.", f).captures);

In [None]:
function measurement_pairs(d1, d2)
    pairs = []
    for file in glob("*.ndjson", d1)
        msm_id, prb_id = parsename(file)
        if !haskey(ping_traceroute, msm_id)
            print("#$(msm_id) ")
            continue
        end
        new = replace(basename(file), string(msm_id) => string(ping_traceroute[msm_id]))
        push!(pairs, (file, joinpath(d2, new)))
    end
    pairs
end;

### Traceroutes

In [None]:
# How many AS paths are associated to a given state?
# Cas extreme => 1 nouvel etats par timestep = exactement 1 AS/IP path par état
# How many states are associated to a given AS path?
# Cas extreme => 1 seul etat pour toute la serie = exactement 1 etat par AS/IP path
# => On veut que ces deux conditions soit le plus rapproche (= bonne segmentation)
# TODO: 3 levels : IP, router (alias resolution), AS

In [None]:
# ping_traceroute_pairs = measurement_pairs(
#     "/blobs/datasets/thesis/ping_v4_1580511600_1581116400_noself_pairs/",
#     "/blobs/datasets/thesis/traceroute_v4_1580511600_1581116400_noself_pairs/"
# );

In [None]:
ping_traceroute_pairs = measurement_pairs(
    "/blobs/datasets/thesis/ping_v4_1580511600_1581116400_01_pairs/",
    "/blobs/datasets/thesis/traceroute_v4_1580511600_1581118200_01_pairs/"
);

#### IP Path

In [None]:
counts_ab = Int[]
counts_ba = Int[]
@showprogress for (ping_file, traceroute_file) in ping_traceroute_pairs
    try
        ping_file = "$(ping_file).model.json"
        traceroute_file = "$(splitext(traceroute_file)[1]).processed.json"

        model = parsefile(DataSegmentationModel, ping_file)
        traceroute = parsefile(Vector{TracerouteRecord}, traceroute_file)

        ping_segments = segments(model)
        traceroute_segments = segments(labelize(traceroute))

        res1 = map(x -> length(x), values(reduce(bidirectional_mapping(ping_segments, traceroute_segments)[1])))
        res2 = map(x -> length(x), values(reduce(bidirectional_mapping(ping_segments, traceroute_segments)[2])))
        push!(counts_ab, res1...)
        push!(counts_ba, res2...)
    catch e
        !(e isa SystemError) && raise(e)
        # println(e.prefix)
    end
end

In [None]:
fig, axs = subplots(ncols = 2, figsize = (8, 3))

d = counts(counts_ab, maximum(counts_ab))
# d *= 100 / sum(d)

axs[1].bar(1:length(d), d)
axs[1].set_xlim(xmax = 18.5)
axs[1].set_title(L"Nombre d'états\\associés à $x$ chemins IP")
axs[1].grid()

d = counts(counts_ba, maximum(counts_ba))
# d *= 100 / sum(d)

axs[2].bar(1:length(d), d)
axs[2].set_xlim(xmax = 12)
axs[2].set_title(L"Nombre de chemins IP\\associés à $x$ états")
axs[2].grid()

save_thesis("atlas_state_ip_mapping", axw = raw"0.5\linewidth", axh = "6cm", extra_axis_params=["align=center"])

In [None]:
# mean(counts_ab), mean(counts_ba)

#### AS Path

In [None]:
counts_ab = Int[]
counts_ba = Int[]
@showprogress for (ping_file, traceroute_file) in ping_traceroute_pairs
    try
        ping_file = "$(ping_file).model.json"
        traceroute_file = "$(splitext(traceroute_file)[1]).processed.json"

        model = parsefile(DataSegmentationModel, ping_file)
        traceroute = parsefile(Vector{TracerouteRecord}, traceroute_file)

        ping_segments = segments(model)
        traceroute_segments = segments(labelize(traceroute, :hops_asn))

        res1 = map(x -> length(x), values(reduce(bidirectional_mapping(ping_segments, traceroute_segments)[1])))
        res2 = map(x -> length(x), values(reduce(bidirectional_mapping(ping_segments, traceroute_segments)[2])))
        push!(counts_ab, res1...)
        push!(counts_ba, res2...)
    catch e
        !(e isa SystemError) && raise(e)
        # println(e.prefix)
    end
end

In [None]:
fig, axs = subplots(ncols = 2, figsize = (8, 3))

d = counts(counts_ab, maximum(counts_ab))
# d *= 100 / sum(d)

axs[1].bar(1:length(d), d)
axs[1].set_xlim(xmax = 18.5)
axs[1].set_title(L"Nombre d'états\\associés à $x$ chemins AS")
axs[1].grid()

d = counts(counts_ba, maximum(counts_ba))
# d *= 100 / sum(d)

axs[2].bar(1:length(d), d)
axs[2].set_xlim(xmax = 12)
axs[2].set_title(L"Nombre de chemins AS\\associés à $x$ états")
axs[2].grid()

save_thesis("atlas_state_as_mapping", axw = raw"0.5\linewidth", axh = "6cm", extra_axis_params=["align=center"])

In [None]:
mean(counts_ab), mean(counts_ba)

In [None]:
# Hypothese: qd. congestion un changement de chemin IP cause un changement de delai significatif ?
# (decongestion)
# Qd. un état associé à plusieurs chemins, la variance du délai dans cet état devrait être faible ?

### Plots

In [None]:
using Dates
function PyPlot.plot(model::DataSegmentationModel; ax = gca())
    cmap = Dict(i => plt.cm.tab20(i) for i in sort(unique(model.state)))
    ax.plot(unix2datetime.(model.index), coalesce.(model.data, NaN))
    for segment in segments(model, closed = true)
        span = unix2datetime.((segment.range.start, segment.range.stop))
        ax.axvspan(span..., alpha = 0.3, color = cmap[segment.state])
    end
end

In [None]:
@show idx = rand(1:length(ping_traceroute_pairs))
@show ping_file, traceroute_file = ping_traceroute_pairs[idx]
ping_file = "$(ping_file).model.json"
traceroute_file = "$(splitext(traceroute_file)[1]).processed.json"
model = parsefile(DataSegmentationModel, ping_file)
traceroute = parsefile(Vector{TracerouteRecord}, traceroute_file)

fig, axs = subplots(nrows = 2, figsize = (10, 4))
plot(model, ax = axs[1])

for segment in segments(labelize(traceroute, :hops), closed = true)
    axs[2].axvspan(segment.range.start, segment.range.stop, color = plt.cm.tab20(segment.state))
end

In [None]:
# Montrer qu'on a une bonne correspondance sur cette trace.
# (ping_file, traceroute_file) = ping_traceroute_pairs[idx] = ("/blobs/datasets/thesis/ping_v4_1580511600_1581116400_01_pairs/1402085_6624.ndjson", "/blobs/datasets/thesis/traceroute_v4_1580511600_1581118200_01_pairs/1402084_6624.ndjson")

In [None]:
# (ping_file, traceroute_file) = ping_traceroute_pairs[idx] = ("/blobs/datasets/thesis/ping_v4_1580511600_1581116400_01_pairs/17661127_6381.ndjson", "/blobs/datasets/thesis/traceroute_v4_1580511600_1581118200_01_pairs/17661126_6381.ndjson")

In [None]:
d = labelize(traceroute, :hops_asn);

In [None]:
segs = segments(d.label, closed = true)

In [None]:
d.data[segs[1]][1].hops_asn

In [None]:
d.data[segs[2]][1].hops_asn

In [None]:
labelize(traceroute, :hops_asn).data[1].hops_asn

In [None]:
labelize(traceroute, :hops_asn).data[4].hops_asn