In [87]:
import pandas as pd
import os
from matplotlib import pyplot as plt
import seaborn as sns
from ReadersourcingParameters import ReadersourcingParameters

plt.ioff()

def build_df_shuffle(quantity_label, seed, shuffle_perc, identifiers_perc):
    quantity_df = pd.DataFrame(columns=["Shuffle", "Identifier", "Quantity"])
    shuffle_amount = round((seed.shuffle_amount * shuffle_perc) / 100) 
    for index_shuffle in range(shuffle_amount):
        percentage = 100*index_shuffle/seed.shuffle_amount
        if percentage % 5 == 0:
            print("{}/{} ({}/100%)".format(int(index_shuffle), seed.shuffle_amount, int(percentage)))
        quantities = pd.read_json("{}/shuffle/shuffle_{}/quantities.json".format(seed.result_folder_base_path, index_shuffle))
        row = quantities.loc[quantities["Quantity"] == quantity_label]
        row = row.reset_index()
        identifiers = row.at[0, "Identifiers"]
        if identifiers_perc != 0:
            identifiers_amount = round((len(identifiers) * identifiers_perc) / 100)
            identifiers = identifiers[:identifiers_amount]
        quantity = row.at[0, "Values"]
        for index, identifier in enumerate(identifiers):
            quantity_df = quantity_df.append({"Shuffle": index_shuffle, "Identifier": identifier, "Quantity": quantity[index]}, ignore_index=True)
    quantity_df["Shuffle"] = quantity_df["Shuffle"].astype(int)
    quantity_df["Identifier"] = quantity_df["Identifier"].astype(int)
    print("{}/{} (100/100%)".format(seed.shuffle_amount, seed.shuffle_amount))
    return quantity_df

def build_df(quantity_label, seed, identifiers_perc):
    quantity_df = pd.DataFrame(columns=["Identifier", "Quantity"])
    quantities = pd.read_json("{}/quantities.json".format(seed.result_folder_base_path))
    display(quantities)
    row = quantities.loc[quantities["Quantity"] == quantity_label]
    row = row.reset_index()
    identifiers = row.at[0, "Identifiers"]
    if identifiers_perc != 0:
        identifiers_amount = round((len(identifiers) * identifiers_perc) / 100)
        identifiers = identifiers[:identifiers_amount]
    quantity = row.at[0, "Values"]
    for index, identifier in enumerate(identifiers):
        quantity_df = quantity_df.append({"Identifier": identifier, "Quantity": quantity[index]}, ignore_index=True)
    quantity_df["Identifier"] = quantity_df["Identifier"].astype(int)
    print("{}/{} (100/100%)".format(seed.shuffle_amount, seed.shuffle_amount))
    return quantity_df

In [68]:
# ------------------------------
# ---------- EXP 1-A -----------
# ------------------------------

# Analizzo l'andamento degli score di tre reader "speciali" per X shuffle dei rating di un dato seed.
# Reader 1 vota sempre nel modo "esatto" (sulla media). 
# Reader 2 vota sempre a metà scala rispetto a 1001.
# Reader 3 vota sempre sugli estremi.
# L'ordinamento desiderato degli score è Reader 1 -> Reader 2 -> Reader 3 (ordine crescente)

seed = ReadersourcingParameters(
    dataset_name="seed_shuffle_1_special", 
    dataset_folder_path="../data/{}/", 
    data_shuffled=True, 
    current_shuffle = 0,
    shuffle_amount=100
)

# Plot parameters

plt.rcParams['figure.figsize']=(6,6)
linesize = .5

# Paths

result_folder_experiments_path = "{}/experiments/".format(seed.result_folder_base_path)
result_folder_exp_1_a_path = "{}/experiment_1-a/".format(result_folder_experiments_path)
os.makedirs(result_folder_experiments_path, exist_ok=True)
os.makedirs(result_folder_exp_1_a_path, exist_ok=True)

In [72]:
shuffle_perc = 5
papers_perc = 5
df = build_df_shuffle("Reader Score", seed, shuffle_perc, 0)

readers_number = df[df["Shuffle"] == 0].shape[0]

df = df[
    (df["Identifier"] == readers_number-1) |
    (df["Identifier"] == readers_number-2) |
    (df["Identifier"] == readers_number-3) 
]

0/100 (0/100%)
100/100 (100/100%)


In [73]:
with plt.rc_context({'lines.linewidth': linesize}):
    ax = sns.pointplot(x="Identifier", y="Quantity", hue="Shuffle", 
                       data=df, order=[readers_number-3,readers_number-2,readers_number-1])
    ax.set_title(f"shuffle_perc: {shuffle_perc}%")
fig = ax.get_figure()
path = "{}/special-reader-score-lineplot.pdf".format(result_folder_exp_1_a_path)
fig.savefig(path, bbox_inches='tight')
plt.close(fig)

In [55]:
# ------------------------------
# ---------- EXP 1-B -----------
# ------------------------------

# Analizzo le quantità calcolate da Readersourcing per X shuffle dei rating di un dato seed.
# Per ciascuna quantità viene costruito un grafico che mostra i valori della quantità corrente per ciascuno shuffle.

seed_shuffle_1 = ReadersourcingParameters(
    dataset_name="seed_shuffle_1", 
    dataset_folder_path="../data/{}/", 
    data_shuffled=True, 
    current_shuffle = 0,
    shuffle_amount=100
)

# Plot parameters

linesize = .75

# Paths

result_folder_experiments_path = "{}/experiments/".format(seed_shuffle_1.result_folder_base_path)
result_folder_exp_1_b_path = "{}/experiment_1-b/".format(result_folder_experiments_path)
os.makedirs(result_folder_experiments_path, exist_ok=True)
os.makedirs(result_folder_exp_1_b_path, exist_ok=True)

In [56]:
shuffle_perc = 5
papers_perc = 5
df = build_df_shuffle("Paper Score", seed_shuffle_1, shuffle_perc, papers_perc)

print("---------- BUILDING PAPER SCORE LINE PLOT ----------")
with plt.rc_context({'lines.linewidth': linesize}):
    ax = sns.pointplot(x="Identifier", y="Quantity", hue="Shuffle", data=df)
    ax.set_title(f"shuffle_perc: {shuffle_perc}%, papers_perc: {papers_perc}%")
fig = ax.get_figure()
path = "{}/paper-score-lineplot.pdf".format(result_folder_exp_1_b_path)
fig.savefig(path, bbox_inches='tight')
plt.close(fig)

0/100 (0/100%)
100/100 (100/100%)
---------- BUILDING PAPER SCORE LINE PLOT ----------


In [57]:
shuffle_perc = 5
papers_perc = 5
df = build_df_shuffle("Paper Steadiness", seed_shuffle_1, shuffle_perc, papers_perc)

print("---------- BUILDING PAPER STEADINESS LINE PLOT ----------")
with plt.rc_context({'lines.linewidth': linesize}):
    ax = sns.pointplot(x="Identifier", y="Quantity", hue="Shuffle", data=df)
    ax.set_title(f"shuffle_perc: {shuffle_perc}%, papers_perc: {papers_perc}%")
fig = ax.get_figure()
path = "{}/paper-steadiness-lineplot.pdf".format(result_folder_exp_1_b_path)
fig.savefig(path, bbox_inches='tight')
plt.close(fig)

0/100 (0/100%)
100/100 (100/100%)
---------- BUILDING PAPER STEADINESS LINE PLOT ----------


In [58]:
shuffle_perc = 5
readers_perc = 2
df = build_df_shuffle("Reader Score", seed_shuffle_1, shuffle_perc, readers_perc)

print("---------- BUILDING READER SCORE LINE PLOT ----------")
with plt.rc_context({'lines.linewidth': linesize}):
    ax = sns.pointplot(x="Identifier", y="Quantity", hue="Shuffle", data=df)
    ax.set_title(f"shuffle_perc: {shuffle_perc}%, readers_perc: {readers_perc}%")
fig = ax.get_figure()
path = "{}/reader-score-lineplot.pdf".format(result_folder_exp_1_b_path)
fig.savefig(path, bbox_inches='tight')
plt.close(fig)

0/100 (0/100%)
100/100 (100/100%)
---------- BUILDING READER SCORE LINE PLOT ----------


In [60]:
shuffle_perc = 5
readers_perc = 2
df = build_df_shuffle("Reader Steadiness", seed_shuffle_1, shuffle_perc, readers_perc)

print("---------- BUILDING READER STEADINESS LINE PLOT ----------")
with plt.rc_context({'lines.linewidth': linesize}):
    ax = sns.pointplot(x="Identifier", y="Quantity", hue="Shuffle", data=df)
    ax.set_title(f"shuffle_perc: {shuffle_perc}%, readers_perc: {readers_perc}%")
fig = ax.get_figure()
path = "{}/reader-steadiness-lineplot.pdf".format(result_folder_exp_1_b_path)
fig.savefig(path, bbox_inches='tight')
plt.close(fig)

0/100 (0/100%)
100/100 (100/100%)
---------- BUILDING READER STEADINESS LINE PLOT ----------


In [90]:
# ------------------------------
# ---------- EXP 1-C -----------
# ------------------------------

# Le quantità calcolate da Readersourcing vengono comparate con la loro media aritmetica, geometrica, bayesiana e con la mediana.
# Gli indici standard vengono calcolati sui giudizi ricevuti da ciascun Paper e sui giudizi assegnati da ciascun Reader.

seed = ReadersourcingParameters(
    dataset_name="ground_truth_2", 
    dataset_folder_path="../data/{}/", 
)

# Plot parameters

linesize = .75

# Paths

result_folder_experiments_path = "{}/experiments/".format(seed.result_folder_base_path)
result_folder_exp_1_c_path = "{}/experiment_1-c/".format(result_folder_experiments_path)
os.makedirs(result_folder_experiments_path, exist_ok=True)
os.makedirs(result_folder_exp_1_c_path, exist_ok=True)

In [91]:
df = build_df("Paper Score", seed, 100)
display(df)

Unnamed: 0,Quantity,Identifiers,Values
0,Paper Steadiness,"[0, 1]","[3e-06, 2.769059892324149]"
1,Paper Score,"[0, 1]","[0.5366666666666661, 0.47419407590625506]"
2,Reader Steadiness,"[0, 1, 2]","[2.769062892324149, 2.769062892324149, 2.76906..."
3,Reader Score,"[0, 1, 2]","[0.6620740494702401, 0.32486654997562003, 0.33..."
4,Author Steadiness,"[0, 1]","[2.769062892324149, 2.769062892324149]"
5,Author Score,"[0, 1]","[0.47419414358899803, 0.47419414358899803]"


0/0 (100/100%)


Unnamed: 0,Identifier,Quantity
0,0,0.536667
1,1,0.474194
