In [15]:
import pandas as pd
import os
import json
from matplotlib import pyplot as plt
import seaborn as sns
from ReadersourcingParameters import ReadersourcingParameters

plt.rcParams['figure.figsize']=(9,9)
plt.ioff()

def build_df(quantity_label, seed):
    quantity_df = pd.DataFrame(columns=["Shuffle", "Identifier", "Quantity"])
    for index_shuffle in range(seed.shuffle_amount):
        percentage = 100*index_shuffle/seed.shuffle_amount
        if percentage % 5 == 0:
            print("{}/{} ({}/100%)".format(int(index_shuffle), seed.shuffle_amount, int(percentage)))
        quantities = pd.read_json("{}/shuffle/shuffle_{}/quantities.json".format(seed.result_folder_base_path, index_shuffle))
        row = quantities.loc[quantities["Quantity"] == quantity_label]
        row = row.reset_index()
        identifiers = row.at[0, "Identifiers"]
        quantity = row.at[0, "Values"]
        for index, identifier in enumerate(identifiers):
            quantity_df = quantity_df.append({"Shuffle": index_shuffle, "Identifier": identifier, "Quantity": quantity[index]}, ignore_index=True)
    quantity_df["Shuffle"] = quantity_df["Shuffle"].astype(int)
    quantity_df["Identifier"] = quantity_df["Identifier"].astype(int)
    print("{}/{} (100/100%)".format(seed.shuffle_amount, seed.shuffle_amount))
    return quantity_df

In [16]:
# ------------------------------
# ---------- EXP 1-A -----------
# ------------------------------

# Reader 1 dev essere il migliore, seguito da quello che vota a metà scala e da quello che vota agli estremi

seed_shuffle_1_special = ReadersourcingParameters(
    dataset_name="seed_shuffle_1_special", 
    dataset_folder_path="../data/{}/", 
    data_shuffled=True, 
    current_shuffle = 0,
    shuffle_amount=100
)

df = build_df("Reader Score", seed_shuffle_1_special)

readers_number = df[df["Shuffle"] == 0].shape[0]
df = df[
    (df["Identifier"] == readers_number-1) |
    (df["Identifier"] == readers_number-2) |
    (df["Identifier"] == readers_number-3) 
]

for shuffle_index in range(seed_shuffle_1_special.shuffle_amount):
    df_shuffle = df[df["Shuffle"] == shuffle_index]
    display(df_shuffle)
    reader_1_rating = df_shuffle.at[readers_number-1, "Quantity"]
    reader_2_rating = df_shuffle.at[readers_number-2, "Quantity"]
    reader_3_rating = df_shuffle.at[readers_number-3, "Quantity"]
    print(reader_1_rating, reader_2_rating, reader_3_rating)
    assert False

0/100 (0/100%)
{'Values': [-45.254051230472214, 0.863910569825657, 0.49299976452371685, 0.6644034872831285, 0.3378666984074914, -105.22016726261715, 0.1083881082083535, 0.48514590934072854, 0.2476717243527222, 0.6235055263622776, 0.5053968766215179, 0.9149066826145429, 0.709674286585878, 0.9201507132654091, 0.48068601237988823, 1.0409107850250152, 0.4962902585379341, 0.4372544936396318, 0.5035464755716988, 0.5794770122301093, 0.11786015559401146, 0.4719970797185512, 0.11184980607386595, 0.14863739351599256, 0.22001048550103958, 0.4902920779750107, -44.26664445463439, -56.174812538563664, 0.3473181035395917, 0.4603157896771629, 0.4552257102319284, 0.26329545658696063, -0.04630040448105346, 1.0089034193778825, 0.4107923657359918, 0.6802888201508723, 0.5469151052047103, 0.43544930369771184, 1.0457778100157054, 0.7566512759087833, -0.27281871636255656, 0.0768902323599087, 0.5677467009503885, 0.5972118744278135, 0.14415204321743613, 0.6118453648743446, 0.5272388023166386, 0.131519241578141,

AssertionError: 

In [68]:
# ------------------------------
# ---------- EXP 1-B -----------
# ------------------------------

seed_shuffle_1 = ReadersourcingParameters(
    dataset_name="seed_shuffle_1", 
    dataset_folder_path="../data/{}/", 
    data_shuffled=True, 
    current_shuffle = 0,
    shuffle_amount=100
)

result_folder_experiments_path = "{}/experiments/".format(seed_shuffle_1.result_folder_base_path)
result_folder_exp_1_path = "{}/experiment_1-b/".format(result_folder_experiments_path)

os.makedirs(result_folder_experiments_path, exist_ok=True)

In [69]:
df = build_df("Paper Score", seed_shuffle_1)

print("---------- BUILDING PAPER SCORE SCATTER PLOT ----------")
os.makedirs(result_folder_exp_1_path, exist_ok=True)
ax = sns.scatterplot(x="Identifier", y="Quantity", data=df)
fig = ax.get_figure()
path = "{}/paper-score-scatterplot.pdf".format(result_folder_exp_1_path)
fig.savefig(path, bbox_inches='tight')
plt.close(fig)

print("---------- BUILDING PAPER SCORE LINE PLOT ----------")
df = df[df["Shuffle"] < 6]
df = df[df["Identifier"] < 20]
ax = sns.pointplot(x="Identifier", y="Quantity", hue="Shuffle", data=df)
fig = ax.get_figure()
path = "{}/paper-score-lineplot.pdf".format(result_folder_exp_1_path)
fig.savefig(path, bbox_inches='tight')
plt.close(fig)

0/100 (0/100%)
5/100 (5/100%)
10/100 (10/100%)
15/100 (15/100%)
20/100 (20/100%)
25/100 (25/100%)
30/100 (30/100%)
35/100 (35/100%)
40/100 (40/100%)
45/100 (45/100%)
50/100 (50/100%)
55/100 (55/100%)
60/100 (60/100%)
65/100 (65/100%)
70/100 (70/100%)
75/100 (75/100%)
80/100 (80/100%)
85/100 (85/100%)
90/100 (90/100%)
95/100 (95/100%)
100/100 (100/100%)
---------- BUILDING PAPER SCORE SCATTER PLOT ----------
---------- BUILDING PAPER SCORE LINE PLOT ----------


In [70]:
df = build_df("Paper Steadiness", seed_shuffle_1)

print("---------- BUILDING PAPER STEADINESS SCATTER PLOT ----------")
os.makedirs(result_folder_exp_1_path, exist_ok=True)
ax = sns.scatterplot(x="Identifier", y="Quantity", data=df)
fig = ax.get_figure()
path = "{}/paper-steadiness-scatterplot.pdf".format(result_folder_exp_1_path)
fig.savefig(path, bbox_inches='tight')
plt.close(fig)

print("---------- BUILDING PAPER STEADINESS LINE PLOT ----------")
df = df[df["Shuffle"] < 6]
df = df[df["Identifier"] < 20]
ax = sns.pointplot(x="Identifier", y="Quantity", hue="Shuffle", data=df)
fig = ax.get_figure()
path = "{}/paper-steadiness-lineplot.pdf".format(result_folder_exp_1_path)
fig.savefig(path, bbox_inches='tight')
plt.close(fig)

0/100 (0/100%)
5/100 (5/100%)
10/100 (10/100%)
15/100 (15/100%)
20/100 (20/100%)
25/100 (25/100%)
30/100 (30/100%)
35/100 (35/100%)
40/100 (40/100%)
45/100 (45/100%)
50/100 (50/100%)
55/100 (55/100%)
60/100 (60/100%)
65/100 (65/100%)
70/100 (70/100%)
75/100 (75/100%)
80/100 (80/100%)
85/100 (85/100%)
90/100 (90/100%)
95/100 (95/100%)
100/100 (100/100%)
---------- BUILDING PAPER STEADINESS SCATTER PLOT ----------
---------- BUILDING PAPER STEADINESS LINE PLOT ----------


In [71]:
df = build_df("Reader Score", seed_shuffle_1)

print("---------- BUILDING READER SCORE SCATTER PLOT ----------")
os.makedirs(result_folder_exp_1_path, exist_ok=True)
ax = sns.scatterplot(x="Identifier", y="Quantity", data=df)
fig = ax.get_figure()
path = "{}/reader-score-scatterplot.pdf".format(result_folder_exp_1_path)
fig.savefig(path, bbox_inches='tight')
plt.close(fig)

print("---------- BUILDING READER SCORE LINE PLOT ----------")
df = df[df["Shuffle"] < 6]
df = df[df["Identifier"] < 20]
ax = sns.pointplot(x="Identifier", y="Quantity", hue="Shuffle", data=df)
fig = ax.get_figure()
path = "{}/reader-score-lineplot.pdf".format(result_folder_exp_1_path)
fig.savefig(path, bbox_inches='tight')
plt.close(fig)

0/100 (0/100%)
5/100 (5/100%)
10/100 (10/100%)
15/100 (15/100%)
20/100 (20/100%)
25/100 (25/100%)
30/100 (30/100%)
35/100 (35/100%)
40/100 (40/100%)
45/100 (45/100%)
50/100 (50/100%)
55/100 (55/100%)
60/100 (60/100%)
65/100 (65/100%)
70/100 (70/100%)
75/100 (75/100%)
80/100 (80/100%)
85/100 (85/100%)
90/100 (90/100%)
95/100 (95/100%)
100/100 (100/100%)
---------- BUILDING READER SCORE SCATTER PLOT ----------
---------- BUILDING READER SCORE LINE PLOT ----------


In [72]:
df = build_df("Reader Steadiness", seed_shuffle_1)

print("---------- BUILDING READER STEADINESS SCATTER PLOT ----------")
os.makedirs(result_folder_exp_1_path, exist_ok=True)
ax = sns.scatterplot(x="Identifier", y="Quantity", data=df)
fig = ax.get_figure()
path = "{}/reader-steadiness-scatterplot.pdf".format(result_folder_exp_1_path)
fig.savefig(path, bbox_inches='tight')
plt.close(fig)

print("---------- BUILDING READER STEADINESS LINE PLOT ----------")
df = df[df["Shuffle"] < 6]
df = df[df["Identifier"] < 20]
ax = sns.pointplot(x="Identifier", y="Quantity", hue="Shuffle", data=df)
fig = ax.get_figure()
path = "{}/reader-steadiness-lineplot.pdf".format(result_folder_exp_1_path)
fig.savefig(path, bbox_inches='tight')
plt.close(fig)

0/100 (0/100%)
5/100 (5/100%)
10/100 (10/100%)
15/100 (15/100%)
20/100 (20/100%)
25/100 (25/100%)
30/100 (30/100%)
35/100 (35/100%)
40/100 (40/100%)
45/100 (45/100%)
50/100 (50/100%)
55/100 (55/100%)
60/100 (60/100%)
65/100 (65/100%)
70/100 (70/100%)
75/100 (75/100%)
80/100 (80/100%)
85/100 (85/100%)
90/100 (90/100%)
95/100 (95/100%)
100/100 (100/100%)
---------- BUILDING READER STEADINESS SCATTER PLOT ----------
---------- BUILDING READER STEADINESS LINE PLOT ----------
