In [37]:
import pandas as pd
import matplotlib.pyplot as plt
import scienceplots
from itertools import product
import matplotlib.lines as mlines
from matplotlib.ticker import MaxNLocator

In [41]:
DATASET = "ia-enron-email-all"
DATASET = "fb-wosn-friends"
DATASET = "comm-linux-kernel-reply"

FILE_NAME = f"explicit-ball-size/{DATASET}.csv"
df = pd.read_csv(FILE_NAME)

In [42]:
parameters = []

print(f"Preprocess {DATASET} ...", end="")

data = df
ks = data["k"].unique()
phis = data["phi"].unique()
timestamps = data["timestamp"].unique()
vertices = data["vertex"].unique()

exact_ball_size = data[(data["k"] == 0) & (data["phi"] == 0.0)]
exact_ball_size = (
    exact_ball_size.groupby(["vertex", "timestamp"])["ball_size"].mean()
)

covering = {(k, phi): 0.0 for k, phi in product(ks, phis) if phi != 0}

for k, phi in covering.keys():
    x = (
        data[(data["k"] == k) & (data["phi"] == phi)]
        .groupby(["vertex", "timestamp"])["ball_size"]
        .mean()
    )
    x = (x / exact_ball_size).groupby("timestamp")

    covering[k, phi] = dict()
    covering[k, phi]["mean"] = x.mean()
    covering[k, phi]["std"] = x.std()
 
print(f"done")

covering[0, 0.25]

Preprocess comm-linux-kernel-reply ...done


{'mean': timestamp
 50531     0.970670
 54418     0.968216
 58305     0.963480
 62192     0.956440
 66079     0.968977
 69966     0.964136
 73853     0.964619
 77740     0.956066
 81627     0.953745
 85514     0.944319
 89401     0.964391
 93288     0.958782
 97175     0.963853
 101062    0.957233
 104949    0.947894
 108836    0.941706
 112723    0.931084
 116610    0.935239
 120497    0.938249
 124384    0.963038
 128271    0.961591
 132158    0.953238
 136045    0.946958
 139932    0.939666
 143819    0.947517
 147706    0.938190
 151593    0.931479
 155480    0.927454
 159367    0.922176
 163254    0.914908
 167141    0.909842
 171028    0.923103
 174915    0.917429
 178802    0.921845
 182689    0.921674
 186576    0.915647
 190463    0.906890
 194350    0.901383
 198237    0.901356
 202124    0.929307
 206011    0.950916
 209898    0.946759
 213785    0.939334
 217672    0.931416
 221559    0.924603
 225446    0.917645
 229333    0.911313
 233220    0.907282
 237107    0.901572
 

In [43]:
with plt.style.context(["science", "grid", "ieee"]):

    # fig, axis = plt.subplots(2, 2, figsize=(4, 6), sharey=True)
    fig, axis = plt.subplots(1, 4, figsize=(4, 2), sharey=True, sharex=True)
    plt.subplots_adjust(hspace=0.2, wspace=0.05)

    PHIS = [0.1, 0.5, 1.0]
    KS = [0, 2, 4, 8]

    colours = {
        0.1: "tab:blue",
        0.5: "tab:orange",
        0.75: "tab:green",
        1.0: "tab:green",
    }

    markers = {
        0.1: "o",
        0.5: "^",
        0.75: "x",
        1.0: "x",
    }

    markers_size = 4.5

    for i, k in enumerate(KS):
        # ax = axis[i // 2, i % 2]
        ax = axis[i]
        for phi in PHIS:
            x = covering[k, phi]["mean"][:: int(len(covering[k, phi]["mean"]) / 5)]

            ax.plot(
                x.index,
                x,
                color=colours[phi],
                linestyle="-",
                marker=markers[phi],
                markersize=markers_size,
            )

            ax.axhline(
                y=1 - phi / (1 + phi),
                color=colours[phi],
                linestyle="--",
            )

        ax.set_xlim(
            x.index[0] - int(0.3 * x.index[0]), x.index[-1] + int(0.3 * x.index[0])
        )

        # set xtick at first, middle and last timestamp
        ax.set_xticks([x.index[0], x.index[len(x) // 2], x.index[-1]])
        ax.set_xticklabels(
            ["20\%", "50\%", "100\%"],
            rotation=60,
            fontsize=7,
            # ha="right",
            # spacing
        )
        ax.tick_params('x', pad=0)

        ax.set_ylim(0.48, 1.02)
        # ax.set_ylim(0.78, 1.02)

        ax.set_title(f"$k = {k}$")

        if i == 3:
            theoretical_err = plt.Line2D(
                [],
                [],
                color="black",
                linestyle="--",
                label=r"$\left(1-\frac{\varphi}{1+\varphi}\right)$",
            )
            ax.legend(
                handles=[theoretical_err],
                loc="lower right",
                bbox_to_anchor=(0.98, 0.05),
            )

    fig.legend(
        handles=[
            plt.Line2D(
                [0],
                [0],
                label=f"$\\varphi={phi}$",
                color=colours[phi],
                marker=markers[phi],
            )
            for phi in PHIS
        ],
        loc="upper center",
        ncol=3,
        bbox_to_anchor=(0.5, 1.13),
        fontsize=9,
    )

    plt.savefig(f"explicit-ball-size/covering_{DATASET}_vertical.pdf", bbox_inches="tight")
    # plt.show()