In [7]:
import pandas as pd
import seaborn as sb
import numpy as np
import math as m
from matplotlib import pyplot as plt

np.set_printoptions(formatter={'float': lambda x: "{0:0.2f}".format(x)})

# CSV file parsing

info = pd.read_csv("data/ground_truth/info.csv")
paper_authors = pd.read_csv("data/ground_truth/authors.csv")
paper_authors = paper_authors.values
paper_ratings = pd.read_csv("data/ground_truth/ratings.csv")
paper_ratings = paper_ratings.values

# Initial setup

dataset_name = info["Dataset"][0] 
papers_number = info["Paper"][0]
readers_number = info["Reader"][0] 
ratings_number = info["Rating"][0]
papers = np.arange(papers_number)
readers = np.arange(readers_number)
ratings = np.arange(ratings_number)
paper_score = np.zeros(papers_number)
rating_informativeness = np.zeros(ratings_number)
rating_accuracy_loss = np.zeros(ratings_number)
rating_bonus = np.zeros(ratings_number)
reader_bonus = np.zeros(readers_number)
reader_score = np.zeros(readers_number)


def quadratic_loss(a, b):
    return m.pow((a - b), 2)


def logistic_function(value):
    return 1 / 1 + (m.exp((-1 * (value - 0.5))))


for current_paper in papers:

    current_paper_ratings = []
    ratings_sum = 0
    
    # For each paper, consider only its ratings and throw away the other ones
    
    for index, entry in enumerate(paper_ratings):
    
        # Example: <1,1,2,0.8>
        # At Timestamp 1 Reader 1 gave to Paper 2 a Rating of 0.8
        timestamp = int(entry[0])
        reader = int(entry[1])
        paper = int(entry[2])
        rating = entry[3]
        
        if paper == current_paper:
            current_paper_ratings.append(entry)

    # For each rating of the paper under consideration, compute the required quantities
     
    for index, entry in enumerate(current_paper_ratings):

        # Example: <1,1,2,0.8>
        # At Timestamp 1 Reader 1 gave to Paper 2 a Rating of 0.8
        timestamp = int(entry[0])
        reader = int(entry[1])
        paper = int(entry[2])
        rating = entry[3]
    
        # 0 < i < n
    
        if 0 < index < len(current_paper_ratings)-1:
            
            # COMPUTATION START: QI_PAST
            
            past_ratings = current_paper_ratings[:index] 
            past_scores = []
            for past_index, past_entry in enumerate(past_ratings):
                past_rating = past_entry[3]
                past_scores.append(past_rating)
                
            # COMPUTATION START: QI_FUTURE
                
            future_ratings = current_paper_ratings[(index+1):]
            future_scores = []
            for future_index, future_entry in enumerate(future_ratings):
                future_rating = future_entry[3]
                future_scores.append(future_rating)
            
            qi_past_ratings = sum(past_scores) / len(past_scores)
            qi_future_ratings = sum(future_scores) / len(future_scores)
                                
            # COMPUTATION START: INFORMATIVENESS and ACCURACY LOSS
            
            rating_informativeness[timestamp] = quadratic_loss(qi_past_ratings, qi_future_ratings)
            rating_accuracy_loss[timestamp] = quadratic_loss(rating, qi_future_ratings)
            
            # COMPUTATION START: RATING BONUS
            
            rating_bonus[timestamp] = rating_informativeness[timestamp] * logistic_function(rating_accuracy_loss[timestamp])
            
            # COMPUTATION START: READER BONUS - it is the sum of the bonus computed for each of its ratings
            
            reader_bonus[reader] = reader_bonus[reader] + rating_bonus[timestamp]
            
        # Sum the current rating to compute the mean at the end
            
        ratings_sum = ratings_sum + rating
            
    # COMPUTATION START: PAPER SCORE - scores can be aggregated with an index of your choice
    
    paper_score[current_paper] = ratings_sum / len(current_paper_ratings)

In [None]:
# Summary

print("RATING INFORMATIVENESS: ", rating_informativeness)
print("RATING ACCURACY LOSS:   ", rating_accuracy_loss)
print("RATING BONUS:           ", rating_bonus)
print("READER BONUS:           ", reader_bonus)
print("PAPER  SCORE:           ", paper_score)

# Charts

print("CHART: RATING / RATING INFORMATIVENESS")
dataframe = pd.DataFrame({'Rating': ratings, 'Informativeness': rating_informativeness})
dataframe.plot('Rating', 'Informativeness', kind='bar')
plt.show()

print("CHART: RATING / RATING ACCURACY LOSS")
dataframe = pd.DataFrame({'Rating': ratings, 'Accuracy Loss': rating_accuracy_loss})
dataframe.plot('Rating', 'Accuracy Loss', kind='bar')
plt.show()

print("CHART: RATING / RATING BONUS")
dataframe = pd.DataFrame({'Rating': ratings, 'Bonus': rating_bonus})
dataframe.plot('Rating', 'Bonus', kind='bar')
plt.show()

print("CHART: READER / READER BONUS")
dataframe = pd.DataFrame({'Reader': readers, 'Score': reader_score})
dataframe.plot('Reader', 'Score', kind='bar')
plt.show()

print("CHART: PAPER / PAPER SCORE")
dataframe = pd.DataFrame({'Paper' : papers, 'Score' : paper_score})
dataframe.plot('Paper', 'Score', kind='bar')
plt.show()
