In [2]:
import pandas as pd
import numpy as np
import decimal
import math as m
from matplotlib import pyplot as plt
import os
import time
import json

np.set_printoptions(formatter={'float': lambda x: "{0:0.8f}".format(x)})

# Reader score must be set to a very small value otherwise there will be a division by 0

epsilon = 0.000001

# CSV file parsing

dataset_name = "ground_truth_1"
dataset_folder_path = f"../data/{dataset_name}/"
info_filename = f"{dataset_folder_path}info.csv"
ratings_filename = f"{dataset_folder_path}ratings.csv"
authors_filename = f"{dataset_folder_path}authors.csv"

info = pd.read_csv(info_filename)
paper_authors = pd.read_csv(authors_filename)
paper_authors = paper_authors.values
paper_ratings = pd.read_csv(ratings_filename)
paper_ratings = paper_ratings.values

# Initial setup

dataset_name = info["Dataset"][0]
papers_number = info["Paper"][0]
readers_number = info["Reader"][0]
ratings_number = info["Rating"][0]
authors_number = info["Author"][0]
papers = np.arange(papers_number)
readers = np.arange(readers_number)
ratings = np.arange(ratings_number)
authors = np.arange(authors_number)
paper_steadiness = np.zeros(papers_number,dtype=np.dtype(decimal.Decimal))
paper_score = np.zeros(papers_number,dtype=np.dtype(decimal.Decimal))
rating_goodness = np.zeros(ratings_number,dtype=np.dtype(decimal.Decimal))
reader_steadiness = np.zeros(readers_number,dtype=np.dtype(decimal.Decimal))
reader_score = np.zeros(readers_number,dtype=np.dtype(decimal.Decimal))
reader_score.fill(epsilon)
author_steadiness = np.zeros(authors_number, dtype=np.dtype(decimal.Decimal))
author_score = np.zeros(authors_number, dtype=np.dtype(decimal.Decimal))

def get_author(current_paper) :
    found_authors = []
    for author_index, author_entry in enumerate(paper_authors) :
        current_author = int(author_entry[0])
        written_papers = author_entry[1].split(";")
        written_papers = [int(x) for x in written_papers]
        if current_paper in written_papers :
            found_authors.append(current_author)
    return np.asarray(found_authors)

# There are many "print" that you can uncomment if you have to do some debugging
# print("##########")

start_time = time.time()

for index, entry in enumerate(paper_ratings) :

    # Example: <1,1,2,0.8,0>
    # At Timestamp 1 Reader 1 gave to Paper 2 a Rating of 0.8
    timestamp = int(entry[0])
    reader = int(entry[1])
    paper = int(entry[2])
    rating = entry[3]
    authors_of_paper = get_author(paper)
    
    percentage = 100*index/ratings_number
    if percentage % 10 == 0:
        print(f"{int(index)}/{ratings_number} ({int(percentage)}/100%)")
    # print("---------- CURRENT ENTRY ----------")
    # print(f"TIMESTAMP {timestamp} - READER {reader} - PAPER {paper} - SCORE {rating}")
    

    # COMPUTATION START: PAPER AND READER SCORE

    # Saving values at time t(i)

    old_paper_steadiness = paper_steadiness[paper]
    old_paper_score = paper_score[paper]
    old_reader_steadiness = reader_steadiness[reader]
    old_rating_goodness = rating_goodness[timestamp]
    old_reader_score = reader_score[reader]
    
    # print("---------- PRINTING VALUES AT TIME T(I) ----------")
    # print("PAPER STEADINESS T(I) ", old_paper_steadiness)
    # print("PAPER SCORE T(I) ", old_paper_score)
    # print("READER STEADINESS T(I) ", old_paper_score)
    # print("RATING GOODNESS T(I) ", rating_goodness[timestamp])
    # print("READER SCORE T(I) ", old_reader_score)

    # Updating values at time t(i+1)

    paper_steadiness[paper] = old_paper_steadiness + old_reader_score
    paper_score[paper] = ((old_paper_steadiness * old_paper_score) + (old_reader_score * rating)) / paper_steadiness[paper]
    rating_goodness[timestamp] = (1 - (m.sqrt(abs(rating - paper_score[paper]))))
    reader_steadiness[reader] = (old_reader_steadiness + paper_steadiness[paper])
    reader_score[reader] = (((old_reader_steadiness * old_reader_score) + (paper_steadiness[paper] * rating_goodness[timestamp])) / reader_steadiness[reader])

    # print("---------- PRINTING VALUES AT TIME T(I+1) ----------")
    # print("PAPER STEADINESS T(I+1) ", paper_steadiness[paper])
    # print("PAPER SCORE T(I+1) ", paper_score[paper])
    # print("READER STEADINESS T(I+1) ", reader_steadiness[reader])
    # print("RATING GOODNESS T(I+1) ", rating_goodness[timestamp])
    # print("READER SCORE T(I+1) ", reader_score[reader])

    # COMPUTATION START: AUTHOR SCORE

    for author in authors_of_paper :
        # Saving values at time t(i)

        old_author_steadiness = author_steadiness[author]
        old_author_score = author_score[author]

        # Updating values at time t(i+1)

        author_steadiness[author] = old_author_steadiness + old_reader_score
        author_score[author] = ((old_author_steadiness * old_author_score) + (old_reader_score * rating)) / author_steadiness[author]

    # COMPUTATION START: PROPAGATING CHANGES TO PREVIOUS READERS

    if index > 0 :

        previous_paper_ratings = pd.DataFrame(
            paper_ratings[:index],
            columns=["Timestamp", "Reader", "Paper", "Score"]
        )
        previous_paper_ratings["Score"] = previous_paper_ratings["Score"]
        previous_paper_ratings = previous_paper_ratings.loc[
            (previous_paper_ratings["Paper"] == float(paper)) &
            (previous_paper_ratings["Reader"] != float(reader))
        ]

        previous_paper_ratings = previous_paper_ratings.values

        # print(" ----- PREVIOUS PAPER RATINGS -----")

        for previous_index, previous_entry in enumerate(previous_paper_ratings):

            # Example: <1,1,2,0.8,0>
            # At Timestamp 1 Reader 1 gave to Paper 2 a Rating of 0.8 written by Author 0
            previous_timestamp = int(previous_entry[0])
            previous_reader = int(previous_entry[1])
            previous_paper = int(previous_entry[2])
            previous_rating = previous_entry[3]

            # print(f"PREVIOUS TIMESTAMP {previous_timestamp} - PREVIOUS READER {previous_reader} - PREVIOUS PAPER {previous_paper} - PREVIOUS RATING {previous_rating}")

            # Saving previous values at time t(i)

            old_previous_reader_steadiness = reader_steadiness[previous_reader]
            old_previous_reader_score = reader_score[previous_reader]
            old_previous_rating = previous_rating
            old_previous_rating_goodness = rating_goodness[previous_timestamp]

            # Updating previous values at time t(i+1)

            rating_goodness[previous_timestamp] = 1 - (m.sqrt(abs(old_previous_rating - paper_score[paper])))
            reader_steadiness[previous_reader] = (old_previous_reader_steadiness + old_reader_score)
            reader_score[previous_reader] = (
                                                (old_previous_reader_steadiness * old_previous_reader_score) -
                                                (old_paper_steadiness * old_previous_rating_goodness) +
                                                (paper_steadiness[paper] * rating_goodness[previous_timestamp])
                                            ) / reader_steadiness[previous_reader]

        # print(" ----- PREVIOUS PAPER RATINGS END -----")
        
    # print("---------- PRINTING FINAL VALUES AT TIME T(I+1) ----------")
    # print("PAPER STEADINESS: ", paper_steadiness)
    # print("PAPER SCORE: ", paper_score)
    # print("READER STEADINESS: ", reader_steadiness)
    # print("READER SCORE: ", reader_score)
    # print("##########")
    
print(f"{int(ratings_number)}/{ratings_number} (100/100%)")
elapsed_time = time.time() - start_time    
print("ELAPSED TIME: ", elapsed_time)
    

0/6 (0/100%)
3/6 (50/100%)
6/6 (100/100%)
ELAPSED TIME:  0.013010978698730469


In [3]:
# Summary

print("PAPER STEADINESS:  ", paper_steadiness)
print("PAPER SCORE:       ", paper_score)
print("READER STEADINESS: ", reader_steadiness)
print("READER SCORE:      ", reader_score)
print("AUTHOR STEADINESS: ", author_steadiness)
print("AUTHOR SCORE:      ", author_score)

result_folder_path = f"../models/{dataset_name}/"

columns = ['Quantity', 'Identifiers','Values']
dictionary = [
    {'Quantity': 'Paper Steadiness', 'Identifiers': papers.tolist(), 'Values': paper_steadiness.tolist()},
    {'Quantity': 'Paper Score', 'Identifiers': papers.tolist(), 'Values': paper_score.tolist()},
    {'Quantity': 'Reader Steadiness', 'Identifiers': readers.tolist(), 'Values': reader_steadiness.tolist()},
    {'Quantity': 'Reader Score', 'Identifiers': readers.tolist(), 'Values': reader_score.tolist()},
    {'Quantity': 'Author Steadiness', 'Identifiers': authors.tolist(), 'Values': author_steadiness.tolist()},
    {'Quantity': 'Author Score', 'Identifiers': authors.tolist(), 'Values': author_score.tolist()},
]

quantities_filename = f"{result_folder_path}readersourcing/quantities.json"
os.makedirs(f"{result_folder_path}readersourcing/", exist_ok=True)

print(f"PRINTING QUANTITIES TO .JSON FILE AT PATH {quantities_filename}")

with open(quantities_filename, 'w') as outfile:  
    json.dump(dictionary, outfile)

rating_matrix = np.zeros((readers_number, papers_number))
goodness_matrix = np.zeros((readers_number, papers_number))

for index, entry in enumerate(paper_ratings) :

    # Example: <1,1,2,0.8,0>
    # At Timestamp 1 Reader 1 gave to Paper 2 a Rating of 0.8
    timestamp = int(entry[0])
    reader = int(entry[1])
    paper = int(entry[2])
    rating = entry[3]
    
    rating_matrix[reader][paper] = rating
    goodness_matrix[reader][paper] = rating_goodness[timestamp]
    
ratings_dataframe = pd.DataFrame(rating_matrix)
goodness_dataframe = pd.DataFrame(goodness_matrix)

ratings_filename = f"{result_folder_path}readersourcing/ratings.json"
os.makedirs(f"{result_folder_path}readersourcing/", exist_ok=True)

goodness_filename = f"{result_folder_path}readersourcing/goodness.json"
os.makedirs(f"{result_folder_path}readersourcing/", exist_ok=True)

print(f"PRINTING RATING MATRIX TO .JSON FILE AT PATH {ratings_filename}")

ratings_dataframe.to_json(ratings_filename)

print(f"PRINTING RATING GOODNESS MATRIX TO .JSON FILE AT PATH {goodness_filename}")

goodness_dataframe.to_json(goodness_filename)

PAPER STEADINESS:   [1.000003 0.5527874045000422]
PAPER SCORE:        [0.7999988000036001 0.5]
READER STEADINESS:  [1.000003 1.000003 1.5527904045000422 1.5527904045000422 0 0]
READER SCORE:       [0.9989045565281977 0.22540410535324928 0.5011572610252991
 0.9992945301858136 1e-06 1e-06]
AUTHOR STEADINESS:  [1.5527904045000422]
AUTHOR SCORE:       [0.6932003824409205]
PRINTING QUANTITIES TO .JSON FILE AT PATH ../models/ground_truth_1/readersourcing/quantities.json
PRINTING RATING MATRIX TO .JSON FILE AT PATH ../models/ground_truth_1/readersourcing/ratings.json
PRINTING RATING GOODNESS MATRIX TO .JSON FILE AT PATH ../models/ground_truth_1/readersourcing/goodness.json
