In [2]:
import pandas as pd
import os
import json
from matplotlib import pyplot as plt
import seaborn as sns
from ReadersourcingParameters import ReadersourcingParameters

plt.rcParams['figure.figsize']=(9,9)
plt.ioff()

def build_df(quantity_label, seed):
    quantity_df = pd.DataFrame(columns=["Shuffle", "Identifier", "Quantity"])
    for index_shuffle in range(seed.shuffle_amount):
        percentage = 100*index_shuffle/seed.shuffle_amount
        if percentage % 5 == 0:
            print("{}/{} ({}/100%)".format(int(index_shuffle), seed.shuffle_amount, int(percentage)))
        quantities = pd.read_json("{}/shuffle/shuffle_{}/quantities.json".format(seed.result_folder_base_path, index_shuffle))
        row = quantities.loc[quantities["Quantity"] == quantity_label]
        row = row.reset_index()
        identifiers = row.at[0, "Identifiers"]
        quantity = row.at[0, "Values"]
        for index, identifier in enumerate(identifiers):
            quantity_df = quantity_df.append({"Shuffle": index_shuffle, "Identifier": identifier, "Quantity": quantity[index]}, ignore_index=True)
    quantity_df["Shuffle"] = quantity_df["Shuffle"].astype(int)
    quantity_df["Identifier"] = quantity_df["Identifier"].astype(int)
    print("{}/{} (100/100%)".format(seed.shuffle_amount, seed.shuffle_amount))
    return quantity_df

In [20]:
# ------------------------------
# ---------- EXP 1-A -----------
# ------------------------------

# Reader 1 dev essere il migliore, seguito da quello che vota a metà scala e da quello che vota agli estremi

seed = ReadersourcingParameters(
    dataset_name="seed_shuffle_1_special", 
    dataset_folder_path="../data/{}/", 
    data_shuffled=True, 
    current_shuffle = 0,
    shuffle_amount=28
)

result_folder_experiments_path = "{}/experiments/".format(seed.result_folder_base_path)
result_folder_exp_1_a_path = "{}/experiment_1-a/".format(result_folder_experiments_path)
os.makedirs(result_folder_experiments_path, exist_ok=True)
os.makedirs(result_folder_exp_1_a_path, exist_ok=True)

df = build_df("Reader Score", seed)

#df = df[df["Shuffle"] < 6]

readers_number = df[df["Shuffle"] == 0].shape[0]

df = df[
    (df["Identifier"] == readers_number-1) |
    (df["Identifier"] == readers_number-2) |
    (df["Identifier"] == readers_number-3) 
]

0/28 (0/100%)
7/28 (25/100%)
14/28 (50/100%)
21/28 (75/100%)
28/28 (100/100%)


In [21]:
ax = sns.pointplot(x="Identifier", y="Quantity", hue="Shuffle", data=df,  dodge=True, order=[1000,1002,1001])
fig = ax.get_figure()
path = "{}/special-reader-score-lineplot.pdf".format(result_folder_exp_1_a_path)
fig.savefig(path, bbox_inches='tight')
plt.close(fig)

In [68]:
# ------------------------------
# ---------- EXP 1-B -----------
# ------------------------------

seed_shuffle_1 = ReadersourcingParameters(
    dataset_name="seed_shuffle_1", 
    dataset_folder_path="../data/{}/", 
    data_shuffled=True, 
    current_shuffle = 0,
    shuffle_amount=100
)

result_folder_experiments_path = "{}/experiments/".format(seed_shuffle_1.result_folder_base_path)
result_folder_exp_1_b_path = "{}/experiment_1-b/".format(result_folder_experiments_path)
os.makedirs(result_folder_experiments_path, exist_ok=True)

In [69]:
df = build_df("Paper Score", seed_shuffle_1)

print("---------- BUILDING PAPER SCORE SCATTER PLOT ----------")
os.makedirs(result_folder_exp_1_b_path, exist_ok=True)
ax = sns.scatterplot(x="Identifier", y="Quantity", data=df)
fig = ax.get_figure()
path = "{}/paper-score-scatterplot.pdf".format(result_folder_exp_1_b_path)
fig.savefig(path, bbox_inches='tight')
plt.close(fig)

print("---------- BUILDING PAPER SCORE LINE PLOT ----------")
ax = sns.pointplot(x="Identifier", y="Quantity", hue="Shuffle", data=df)
fig = ax.get_figure()
path = "{}/paper-score-lineplot.pdf".format(result_folder_exp_1_b_path)
fig.savefig(path, bbox_inches='tight')
plt.close(fig)

0/100 (0/100%)
5/100 (5/100%)
10/100 (10/100%)
15/100 (15/100%)
20/100 (20/100%)
25/100 (25/100%)
30/100 (30/100%)
35/100 (35/100%)
40/100 (40/100%)
45/100 (45/100%)
50/100 (50/100%)
55/100 (55/100%)
60/100 (60/100%)
65/100 (65/100%)
70/100 (70/100%)
75/100 (75/100%)
80/100 (80/100%)
85/100 (85/100%)
90/100 (90/100%)
95/100 (95/100%)
100/100 (100/100%)
---------- BUILDING PAPER SCORE SCATTER PLOT ----------
---------- BUILDING PAPER SCORE LINE PLOT ----------


In [70]:
df = build_df("Paper Steadiness", seed_shuffle_1)

print("---------- BUILDING PAPER STEADINESS SCATTER PLOT ----------")
ax = sns.scatterplot(x="Identifier", y="Quantity", data=df)
fig = ax.get_figure()
path = "{}/paper-steadiness-scatterplot.pdf".format(result_folder_exp_1_b_path)
fig.savefig(path, bbox_inches='tight')
plt.close(fig)

print("---------- BUILDING PAPER STEADINESS LINE PLOT ----------")
ax = sns.pointplot(x="Identifier", y="Quantity", hue="Shuffle", data=df)
fig = ax.get_figure()
path = "{}/paper-steadiness-lineplot.pdf".format(result_folder_exp_1_b_path)
fig.savefig(path, bbox_inches='tight')
plt.close(fig)

0/100 (0/100%)
5/100 (5/100%)
10/100 (10/100%)
15/100 (15/100%)
20/100 (20/100%)
25/100 (25/100%)
30/100 (30/100%)
35/100 (35/100%)
40/100 (40/100%)
45/100 (45/100%)
50/100 (50/100%)
55/100 (55/100%)
60/100 (60/100%)
65/100 (65/100%)
70/100 (70/100%)
75/100 (75/100%)
80/100 (80/100%)
85/100 (85/100%)
90/100 (90/100%)
95/100 (95/100%)
100/100 (100/100%)
---------- BUILDING PAPER STEADINESS SCATTER PLOT ----------
---------- BUILDING PAPER STEADINESS LINE PLOT ----------


In [71]:
df = build_df("Reader Score", seed_shuffle_1)

print("---------- BUILDING READER SCORE SCATTER PLOT ----------")
ax = sns.scatterplot(x="Identifier", y="Quantity", data=df)
fig = ax.get_figure()
path = "{}/reader-score-scatterplot.pdf".format(result_folder_exp_1_b_path)
fig.savefig(path, bbox_inches='tight')
plt.close(fig)

print("---------- BUILDING READER SCORE LINE PLOT ----------")
ax = sns.pointplot(x="Identifier", y="Quantity", hue="Shuffle", data=df)
fig = ax.get_figure()
path = "{}/reader-score-lineplot.pdf".format(result_folder_exp_1_b_path)
fig.savefig(path, bbox_inches='tight')
plt.close(fig)

0/100 (0/100%)
5/100 (5/100%)
10/100 (10/100%)
15/100 (15/100%)
20/100 (20/100%)
25/100 (25/100%)
30/100 (30/100%)
35/100 (35/100%)
40/100 (40/100%)
45/100 (45/100%)
50/100 (50/100%)
55/100 (55/100%)
60/100 (60/100%)
65/100 (65/100%)
70/100 (70/100%)
75/100 (75/100%)
80/100 (80/100%)
85/100 (85/100%)
90/100 (90/100%)
95/100 (95/100%)
100/100 (100/100%)
---------- BUILDING READER SCORE SCATTER PLOT ----------
---------- BUILDING READER SCORE LINE PLOT ----------


In [72]:
df = build_df("Reader Steadiness", seed_shuffle_1)

print("---------- BUILDING READER STEADINESS SCATTER PLOT ----------")
ax = sns.scatterplot(x="Identifier", y="Quantity", data=df)
fig = ax.get_figure()
path = "{}/reader-steadiness-scatterplot.pdf".format(result_folder_exp_1_b_path)
fig.savefig(path, bbox_inches='tight')
plt.close(fig)

print("---------- BUILDING READER STEADINESS LINE PLOT ----------")
ax = sns.pointplot(x="Identifier", y="Quantity", hue="Shuffle", data=df)
fig = ax.get_figure()
path = "{}/reader-steadiness-lineplot.pdf".format(result_folder_exp_1_b_path)
fig.savefig(path, bbox_inches='tight')
plt.close(fig)

0/100 (0/100%)
5/100 (5/100%)
10/100 (10/100%)
15/100 (15/100%)
20/100 (20/100%)
25/100 (25/100%)
30/100 (30/100%)
35/100 (35/100%)
40/100 (40/100%)
45/100 (45/100%)
50/100 (50/100%)
55/100 (55/100%)
60/100 (60/100%)
65/100 (65/100%)
70/100 (70/100%)
75/100 (75/100%)
80/100 (80/100%)
85/100 (85/100%)
90/100 (90/100%)
95/100 (95/100%)
100/100 (100/100%)
---------- BUILDING READER STEADINESS SCATTER PLOT ----------
---------- BUILDING READER STEADINESS LINE PLOT ----------
