In [None]:
import os

import matplotlib.lines as mlines
import matplotlib.pyplot as plt
import numpy as np

from tqdm import tqdm

In [None]:
np.random.seed(123)

In [None]:
# Figure text parameters
plt.rc('font', size=20)
plt.rc('text', usetex=True)
plt.rc('text.latex', preamble=r'\usepackage{amsmath,amssymb,bm,bbm,lmodern}')

In [None]:
# Figure parameters
name_mapping = {
    'method_1': 'No Bandwagon',
    'method_2': 'Weak Bandwagon',
    'method_3': 'Strong Bandwagon',
}
colours = {
    'method_1': "lime",
    'method_2': "red",
    'method_3': "black",
  }
markers = {
    'method_1': 'x',
    'method_2': '^',
    'method_3': 'v',
  }
styles = {
    'method_1': '-',
    'method_2': "-",
    'method_3': '-',
  }
linewidths = {
    "method_1": 3.5, 
    "method_2": 3.5, 
    "method_3": 3.5, 
}
zorders = {
    "method_1": 3, 
    "method_2": 2, 
    "method_3": 1, 
}

num_points_plotted = 50  # Too many points leads to jitter
alpha = 0.4  # Opacity

In [None]:
graph_name = "bw_level_comparison"

In [None]:
N = 1000  # Num runs
n = 1000000  # Num ratings per run
p = 0.4  # True relevance

In [None]:
# Plotting helper variables
x_values = np.arange(1,n+1)
plot_idx = (np.unique(np.geomspace(1,n,num_points_plotted).astype(int))-1).tolist()

In [None]:
# No, weak and strong bandwagon parameters: (a, b)
bandwagons_all = [
    (1,1), (0.6,0.9), (0.1,0.95)
]
lambdas_all = [threshold + (1 - threshold)*c**np.arange(n) for (threshold, c) in bandwagons_all]

In [None]:
# Generate the data by iterating over Equation 4 for each setting
M_all = []
for lambdas in lambdas_all:
    R = np.zeros((N,n))  # Keep track of individual ratings
    M = np.zeros((N,n))  # Keep track of sample mean
    ri = np.random.rand(N) < p  # First rating sampled from Bernoulli(p)
    R[:,0] = ri
    M[:,0] = ri
    m = ri

    # Iterate over timesteps (all runs in parallel)
    for i in tqdm(range(1,n)):
        pri = lambdas[i]*p + (1 - lambdas[i])*m  # Calculate P(r_n=1|\bar{p}_{n-1})
        ri = np.random.rand(N) < pri  # Sample new rating
        m = (m*i + ri)/(i + 1)  # Update sample mean
        R[:,i] = ri  # Record the new rating
        M[:,i] = m  # Record the new sample mean
    M_all.append(M)

In [None]:
include_all = {
    "method_1": M_all[0],
    "method_2": M_all[1],
    "method_3": M_all[2]
}

In [None]:
# Calculate the mean and the confidence interval for plotted points only.
data_temp = {k: v[:,plot_idx].mean(axis=0) for k, v in include_all.items()}
ci_temp = {k:(np.percentile(v[:,plot_idx], 5, axis=0), np.percentile(v[:,plot_idx],100-5,axis=0)) for k, v in include_all.items()}

# Add zero values to all other indices
data = {k:np.zeros(n) for k in data_temp}
ci = {k:[np.zeros(n), np.ones(n)] for k in data_temp}
for k in data:
    data[k][plot_idx] = data_temp[k]
    ci[k][0][plot_idx] = ci_temp[k][0]
    ci[k][1][plot_idx] = ci_temp[k][1]

In [None]:
legend_info = {}
for k in data:
    legend_info[k] = {
        'linestyle': styles[k],
        'color': colours[k],
        'markersize': 12,
        'fillstyle': 'none',
        'label': name_mapping[k],
        'linewidth': linewidths[k],
        'zorder': zorders[k]
    }

In [None]:
# Plot
fig = plt.figure(figsize=(7.38/1.03, 1.25*2), linewidth=0.5)
fig.tight_layout()
plt.ioff()
plt.xscale('log')
plt.gca().yaxis.set_ticks_position('both')
plt.gca().xaxis.set_ticks_position('both')
plt.xlim(1, n)
plt.xticks(10**np.arange(np.log10(n)+1))
plt.ylim(0, 1.)

for _k in data:
    _y = data[_k]
    _y_min, _y_max = ci[_k]
    plt.fill_between(
        x_values[plot_idx], _y_min[plot_idx], _y_max[plot_idx],
        alpha=alpha,
        color=legend_info[_k]["color"],
        zorder=legend_info[_k]["zorder"]
    )
    plt.plot(x_values[plot_idx], _y_min[plot_idx], color=legend_info[k]["color"], alpha=0.3)
    plt.plot(x_values[plot_idx], _y_max[plot_idx], color=legend_info[k]["color"], alpha=0.3)


when = np.argmax(ci["method_1"][1]<0.45)
plt.plot([when,when], [0,1], color=tuple(x/255 for x in [115,131,76]), alpha=1)
when = np.argmax(ci["method_2"][1]<0.45)
plt.plot([when,when],[0,1], color=tuple(x/255 for x in [180,97,94]), alpha=1) 
when = np.argmax(ci["method_1"][1]<0.41)
plt.plot([when,when], [0,1], color=tuple(x/255 for x in [115,131,76]), alpha=1, linestyle="-.")
when = np.argmax(ci["method_2"][1]<0.41)
plt.plot([when,when],[0,1], color=tuple(x/255 for x in [180,97,94]), alpha=1, linestyle="-.") 

plt.savefig('./%s.pdf' % (graph_name), bbox_inches='tight', pad_inches=0) 

In [None]:
figlegend = plt.figure(figsize=(0.5,0.5))
ncol = 3
l = figlegend.legend(handles=[mlines.Line2D([], [], **l) for l in legend_info.values()],
               fontsize=18,
               loc='center',
               ncol=ncol,
               frameon=False,
               borderaxespad=0,
               borderpad=0,
               labelspacing=0.2,
               columnspacing=1.)
l.legendHandles[0].set_color(tuple(x/255 for x in [115,131,76]))
l.legendHandles[1].set_color(tuple(x/255 for x in [180,97,94]))
l.legendHandles[2].set_color(tuple(x/255 for x in [153,153,153]))
figlegend.savefig(f'./{graph_name}_legend.pdf',
                bbox_inches='tight')