In [15]:
import pandas as pd
import math as m
import linecache
from collections import deque
import csv
import numpy as np
import os
import collections
import random as rn
import json
import time

# Parameter setting

dataset_name = "seed_2/p_4_beta_big"
days = 180

# Reader score must be set to a very small value otherwise there will be a division by 0

epsilon = 0.000001

# CSV file parsing

dataset_folder_path = "../data/{}/".format(dataset_name)
info_filename = "{}info.csv".format(dataset_folder_path)
ratings_filename = "{}ratings.csv".format(dataset_folder_path)
authors_filename = "{}authors.csv".format(dataset_folder_path)

info = pd.read_csv(info_filename)
paper_authors = pd.read_csv(authors_filename)
paper_authors = paper_authors.values
paper_ratings = pd.read_csv(ratings_filename)
paper_ratings = paper_ratings.values

csv_offset = 2

# Initial setup

dataset_name = info["Dataset"][0]
papers_number = info["Paper"][0]
readers_number = info["Reader"][0]
ratings_number = info["Rating"][0]
ratings_number_per_day = m.floor(int(ratings_number / days))
authors_number = info["Author"][0]
papers = np.arange(papers_number)
readers = np.arange(readers_number)
ratings = np.arange(ratings_number)
authors = np.arange(authors_number)
paper_steadiness = np.zeros(papers_number)
paper_score = np.zeros(papers_number)
rating_goodness = np.zeros(ratings_number)
reader_steadiness = np.zeros(readers_number)
reader_score = np.zeros(readers_number)
reader_score.fill(epsilon)
author_steadiness = np.zeros(authors_number)
author_score = np.zeros(authors_number)

start_time = time.time()

def get_author(current_paper) :
    
    found_authors = []
    
    for author_index, author_entry in enumerate(paper_authors) :
        current_author = int(author_entry[0])
        written_papers = author_entry[1].split(";")
        written_papers = [int(x) for x in written_papers]
        if current_paper in written_papers :
            found_authors.append(current_author)
            
    return np.asarray(found_authors)

# Function to output result to file

def serialize_result(day, current_index, verbose):
    
    result_folder_path = "../models/{}/".format(dataset_name)
    os.makedirs("{}readersourcing/day_{}/".format(result_folder_path, day), exist_ok=True)

    # Quantities output handling

    dictionary = [
        {'Quantity': 'Paper Steadiness', 'Identifiers': papers.tolist(), 'Values': paper_steadiness.tolist()},
        {'Quantity': 'Paper Score', 'Identifiers': papers.tolist(), 'Values': paper_score.tolist()},
        {'Quantity': 'Reader Steadiness', 'Identifiers': readers.tolist(), 'Values': reader_steadiness.tolist()},
        {'Quantity': 'Reader Score', 'Identifiers': readers.tolist(), 'Values': reader_score.tolist()},
        {'Quantity': 'Author Steadiness', 'Identifiers': authors.tolist(), 'Values': author_steadiness.tolist()},
        {'Quantity': 'Author Score', 'Identifiers': authors.tolist(), 'Values': author_score.tolist()},
    ]
    
    result_quantities_filename = "{}readersourcing/day_{}/quantities.json".format(result_folder_path, day)
        
    if verbose:
        print("PRINTING QUANTITIES TO .JSON FILE AT PATH {}".format(result_quantities_filename))
    
    with open(result_quantities_filename, 'w') as result_quantities_file:  
        json.dump(dictionary, result_quantities_file)
    result_quantities_file.close()
        
    # Rating and goodness matrix output handling
    
    rating_matrix = np.zeros((readers_number, papers_number))
    goodness_matrix = np.zeros((readers_number, papers_number))
    
    for rating_index in range(csv_offset, current_index):
                
        current_entry = linecache.getline(ratings_filename, rating_index).split(",")
                
        # Example: <1,1,2,0.8,0>
        # At Timestamp 1 Reader 1 gave to Paper 2 a Rating of 0.8
        current_timestamp = int(current_entry[0])
        current_reader = int(current_entry[1])
        current_paper = int(current_entry[2])
        current_rating = float(current_entry[3])
            
        rating_matrix[current_reader][current_paper] = current_rating
        goodness_matrix[current_reader][current_paper] = rating_goodness[current_timestamp]
    
    result_ratings_filename = "{}readersourcing/day_{}/ratings.csv".format(result_folder_path, day)
    result_goodness_filename = "{}readersourcing/day_{}/goodness.csv".format(result_folder_path, day)
    
    if verbose:
        print("PRINTING RATING MATRIX TO .CSV FILE AT PATH {}".format(result_ratings_filename))
            
    paper_ratings_dataframe = pd.read_csv(ratings_filename)
    ratings_matrix = paper_ratings_dataframe.pivot_table(index="Reader", columns="Paper", values="Score")
    ratings_matrix.fillna(0, inplace=True)
    ratings_matrix.to_csv(result_ratings_filename, sep=",", header=False, index=False)
        
    if verbose:
        print("PRINTING RATING GOODNESS MATRIX TO .CSV FILE AT PATH {}".format(result_goodness_filename))
    
    with open(result_goodness_filename, mode='w', newline='') as result_goodness_file:
        goodness_writer = csv.writer(result_goodness_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        for goodness_entry in goodness_matrix:
            goodness_writer.writerow(goodness_entry)
    result_goodness_file.close()
    
    # Info output handling
    
    result_elapsed_time = time.time() - start_time 
    
    dictionary = [{'Time': result_elapsed_time}]
    
    result_info_filename = "{}readersourcing/day_{}/info.json".format(result_folder_path, day)
    
    if verbose:
        print("PRINTING INFO TO .JSON FILE AT PATH {}".format(result_info_filename))
        
    with open(result_info_filename, 'w') as result_info_file:  
        json.dump(dictionary, result_info_file)
    result_info_file.close()
    
    return result_elapsed_time

# There are many "print" that you can uncomment if you have to do some debugging
# print("##########")

print("0/0 (0/100%)")

days = 1
written = False

for index in range(csv_offset, (ratings_number + csv_offset)):
        
    entry = linecache.getline(ratings_filename, index).split(",")
                                                                 
    # Example: <1,1,2,0.8,0>
    # At Timestamp 1 Reader 1 gave to Paper 2 a Rating of 0.8
    timestamp = int(entry[0])
    reader = int(entry[1])
    paper = int(entry[2])
    rating = float(entry[3])
    authors_of_paper = get_author(paper)
    
    percentage = 100*index/ratings_number
    if percentage % 2 == 0:
        print("{}/{} ({}/100%)".format(int(index), ratings_number, int(percentage)))
    # print("---------- CURRENT ENTRY ----------")
    # print(f"TIMESTAMP {timestamp} - READER {reader} - PAPER {paper} - SCORE {rating}")
    
    if index % (ratings_number_per_day * days) == 0:
        days += 1
        written = False
        
    if days % 5 == 0 and not written:
        serialize_result(days, index, verbose=False)
        written = True

    # COMPUTATION START: PAPER AND READER SCORE

    # Saving values at time t(i)

    old_paper_steadiness = paper_steadiness[paper]
    old_paper_score = paper_score[paper]
    old_reader_steadiness = reader_steadiness[reader]
    old_rating_goodness = rating_goodness[timestamp]
    old_reader_score = reader_score[reader]
    
    # print("---------- PRINTING VALUES AT TIME T(I) ----------")
    # print("PAPER STEADINESS T(I) ", old_paper_steadiness)
    # print("PAPER SCORE T(I) ", old_paper_score)
    # print("READER STEADINESS T(I) ", old_paper_score)
    # print("RATING GOODNESS T(I) ", rating_goodness[timestamp])
    # print("READER SCORE T(I) ", old_reader_score)

    # Updating values at time t(i+1)

    paper_steadiness[paper] = old_paper_steadiness + old_reader_score
    paper_score[paper] = ((old_paper_steadiness * old_paper_score) + (old_reader_score * rating)) / paper_steadiness[paper]
    rating_goodness[timestamp] = (1 - (m.sqrt(abs(rating - paper_score[paper]))))
    reader_steadiness[reader] = (old_reader_steadiness + paper_steadiness[paper])
    reader_score[reader] = (((old_reader_steadiness * old_reader_score) + (paper_steadiness[paper] * rating_goodness[timestamp])) / reader_steadiness[reader])

    # print("---------- PRINTING VALUES AT TIME T(I+1) ----------")
    # print("PAPER STEADINESS T(I+1) ", paper_steadiness[paper])
    # print("PAPER SCORE T(I+1) ", paper_score[paper])
    # print("READER STEADINESS T(I+1) ", reader_steadiness[reader])
    # print("RATING GOODNESS T(I+1) ", rating_goodness[timestamp])
    # print("READER SCORE T(I+1) ", reader_score[reader])

    # COMPUTATION START: AUTHOR SCORE

    for author in authors_of_paper :
        # Saving values at time t(i)

        old_author_steadiness = author_steadiness[author]
        old_author_score = author_score[author]

        # Updating values at time t(i+1)7

        author_steadiness[author] = old_author_steadiness + old_reader_score
        author_score[author] = ((old_author_steadiness * old_author_score) + (old_reader_score * rating)) / author_steadiness[author]

    # COMPUTATION START: PROPAGATING CHANGES TO PREVIOUS READERS
        
    previous_ratings = []        
    with open(ratings_filename) as rating_file:
        raw_previous_ratings = deque([next(rating_file) for x in range(csv_offset, (index + csv_offset))])
        raw_previous_ratings.popleft()
    rating_file.close()
    for raw_previous_rating in raw_previous_ratings:
        previous_rating = raw_previous_rating.split(",")
        previous_ratings.append(previous_rating)
    previous_ratings = np.array(previous_ratings, dtype=float)
    previous_ratings = previous_ratings[
         (previous_ratings[:,1]!=float(reader)) &
         (previous_ratings[:,2]==float(paper))
    ]            
                   
    # print(" ----- PREVIOUS PAPER RATINGS -----")

    for previous_index, previous_entry in enumerate(previous_ratings):
        
        # Example: <1,1,2,0.8,0>
        # At Timestamp 1 Reader 1 gave to Paper 2 a Rating of 0.8 written by Author 0
        previous_timestamp = int(previous_entry[0])
        previous_reader = int(previous_entry[1])
        previous_paper = int(previous_entry[2])
        previous_rating = previous_entry[3]

        # print(f"PREVIOUS TIMESTAMP {previous_timestamp} - PREVIOUS READER {previous_reader} - PREVIOUS PAPER {previous_paper} - PREVIOUS RATING {previous_rating}")

        # Saving previous values at time t(i)

        old_previous_reader_steadiness = reader_steadiness[previous_reader]
        old_previous_reader_score = reader_score[previous_reader]
        old_previous_rating = previous_rating
        old_previous_rating_goodness = rating_goodness[previous_timestamp]

        # Updating previous values at time t(i+1)

        rating_goodness[previous_timestamp] = 1 - (m.sqrt(abs(old_previous_rating - paper_score[paper])))
        reader_steadiness[previous_reader] = (old_previous_reader_steadiness + old_reader_score)
        reader_score[previous_reader] = (
                                            (old_previous_reader_steadiness * old_previous_reader_score) -
                                            (old_paper_steadiness * old_previous_rating_goodness) +
                                            (paper_steadiness[paper] * rating_goodness[previous_timestamp])
                                        ) / reader_steadiness[previous_reader]
           
    # print(" ----- PREVIOUS PAPER RATINGS END -----")
        
    # print("---------- PRINTING FINAL VALUES AT TIME T(I+1) ----------")
    # print("PAPER STEADINESS: ", paper_steadiness)
    # print("PAPER SCORE: ", paper_score)
    # print("READER STEADINESS: ", reader_steadiness)
    # print("READER SCORE: ", reader_score)
    # print("##########")

print("{}/{} (100/100%)".format(int(ratings_number), int(ratings_number))) 
elapsed_time = serialize_result((days-1), ratings_number, verbose=True)
print("ELAPSED TIME: ", elapsed_time)


0/0 (0/100%)
134/6700 (2/100%)
Gay
268/6700 (4/100%)
402/6700 (6/100%)
Gay
536/6700 (8/100%)
Gay
670/6700 (10/100%)
804/6700 (12/100%)
Gay
938/6700 (14/100%)
1072/6700 (16/100%)
Gay
1206/6700 (18/100%)
Gay
1340/6700 (20/100%)
1474/6700 (22/100%)
Gay
1608/6700 (24/100%)
1742/6700 (26/100%)
Gay
1876/6700 (28/100%)
Gay
2010/6700 (30/100%)
2144/6700 (32/100%)
Gay
2278/6700 (34/100%)
2412/6700 (36/100%)
Gay
2546/6700 (38/100%)
Gay
2680/6700 (40/100%)
2814/6700 (42/100%)
Gay
2948/6700 (44/100%)
3082/6700 (46/100%)
Gay
3216/6700 (48/100%)
Gay
3350/6700 (50/100%)
3484/6700 (52/100%)
Gay
3618/6700 (54/100%)
3752/6700 (56/100%)
Gay
3886/6700 (58/100%)
Gay
4020/6700 (60/100%)
4154/6700 (62/100%)
Gay
4288/6700 (64/100%)
4422/6700 (66/100%)
Gay
4556/6700 (68/100%)
Gay
4690/6700 (70/100%)
4824/6700 (72/100%)
Gay
4958/6700 (74/100%)
5092/6700 (76/100%)
Gay
5226/6700 (78/100%)
Gay
5360/6700 (80/100%)
5494/6700 (82/100%)
Gay
5628/6700 (84/100%)
5762/6700 (86/100%)
Gay
5896/6700 (88/100%)
Gay
6030/6700 

In [16]:
# Summary

print("PAPER STEADINESS:  ", paper_steadiness)
print("PAPER SCORE:       ", paper_score)
print("READER STEADINESS: ", reader_steadiness)
print("READER SCORE:      ", reader_score)
print("AUTHOR STEADINESS: ", author_steadiness)
print("AUTHOR SCORE:      ", author_score)


PAPER STEADINESS:   [ 8.65911389  6.43348739  8.93306512 14.63139344  6.3354653   4.60381318
 12.38650395  9.17417006 10.59274655  8.13087666 10.31122603  4.70753227
  9.32295267 12.88541111 10.36961121  6.1325266   7.7639587   8.3313633
  7.57727517  9.27326602 11.41832491  7.54160732  8.99855001 10.81490796
  7.6863949  14.51828326 10.98172265 10.97127775 11.04780878  6.94077717
  7.13503397  9.39496881  9.2429899  12.03054896  6.9393868   9.9490851
  7.74519837  9.2944572  10.1538996   6.4207671  10.99369488  5.38270049
 11.26679051 14.83620467  8.28428346  7.6758088  10.54046388 10.4497946
  7.38332638  8.82391945  7.67925935  4.77518371  7.39728168  7.95340625
 10.33998134  8.61863328  5.67932143 12.35524204  9.70506866  7.71661028
  7.55888208  9.65612552  7.40253009 12.97355792  5.72964102 13.62339982
  9.72590884 13.16478658  9.86648925  9.43842445  6.95956404  6.1945644
 12.37626216  8.36737901 12.30225848  9.15417758  7.01454963  6.64153784
 10.28845927  6.87231432  9.9985800