In [11]:

import pandas as pd
import numpy as np
import math as m
import os
import json
import csv
import random as rn
from random import randint
from scipy.stats import beta

# Seed folder path

dataset_name = "ground_truth_2"
dataset_folder_path = f"../models/{dataset_name}/readersourcing/"
ratings_filename = f"{dataset_folder_path}ratings.csv"
quantities_filename = f"{dataset_folder_path}quantities.json"
stats_filename = f"{dataset_folder_path}stats.csv"

ratings_dataframe = pd.read_csv(ratings_filename, header=None)
ratings_dataframe[ratings_dataframe.columns] = ratings_dataframe[ratings_dataframe.columns].convert_objects(convert_numeric=True)

stats_dataframe = ratings_dataframe.copy()
stats_dataframe[stats_dataframe > 0.0000001] = 1

with open(quantities_filename) as f:
   quantities = json.load(f)

papers = quantities[0]["Identifiers"]   
readers = quantities[2]["Identifiers"]   
authors = quantities[4]["Identifiers"]   
   
paper_steadiness = quantities[0]["Values"]
paper_score = quantities[1]["Values"]
reader_steadiness = quantities[2]["Values"]
reader_score = quantities[3]["Values"]
author_steadiness = quantities[4]["Values"]
author_score = quantities[5]["Values"]

print("---------- PRINTING PARSED RATING MATRIX (Part of it) ----------")
display(ratings_dataframe.head(10))

print("---------- PRINTING PARSED IDENTIFIERS ----------")
print("PAPER:  ", papers)
print("READER: ", readers)
print("AUTHOR: ", authors)

print("---------- PRINTING PARSED QUANTITIES ----------")
print("PAPER STEADINESS:  ", paper_steadiness)
print("PAPER SCORE:       ", paper_score)
print("READER STEADINESS: ", reader_steadiness)
print("READER SCORE:      ", reader_score)
print("AUTHOR STEADINESS: ", author_steadiness)
print("AUTHOR SCORE:      ", author_score)

---------- PRINTING PARSED RATING MATRIX (Part of it) ----------
---------- PRINTING PARSED IDENTIFIERS ----------
PAPER:   [0, 1]
READER:  [0, 1, 2]
AUTHOR:  [0, 1]
---------- PRINTING PARSED QUANTITIES ----------
PAPER STEADINESS:   [3e-06, 2.7690598923241496]
PAPER SCORE:        [0.5366666666666666, 0.4741940759062552]
READER STEADINESS:  [2.7690628923241496, 2.7690628923241496, 2.7690628923241496]
READER SCORE:       [0.6622330287505745, 0.3247363794008299, 0.33352160805166764]
AUTHOR STEADINESS:  [2.7690628923241496, 2.7690628923241496]
AUTHOR SCORE:       [0.4741941435889982, 0.4741941435889982]


For all other conversions use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.


Unnamed: 0,0,1
0,0.51,0.36
1,0.51,0.93
2,0.0,0.0


In [12]:

# stats file generation

print("---------- STATS GENERATION STARTED ----------")

sums = stats_dataframe.sum(axis=0)
sums_dataframe = pd.DataFrame(sums)

max_ratings_paper = sums_dataframe.max()
min_ratings_paper = sums_dataframe.min()
mean_ratings_paper = sums_dataframe.mean(axis=0)

print("MAX NUMBER OF RATINGS FOR A PAPER: ", int(max_ratings_paper.values[0]))
print("MIN NUMBER OF RATINGS FOR A PAPER: ", int(min_ratings_paper.values[0]))
print("MEAN NUMBER OF RATINGS FOR A PAPER: ", mean_ratings_paper.values[0])

ratings_dataframe = ratings_dataframe.T
counter = 0
for index, row in ratings_dataframe.iterrows():
    if len(np.unique(row)) == 1:
        counter+=1
        
print("NUMBER OF PAPERS WITH UNIQUE RATING: ", counter)

# Stats file generation

stats_dataframe = pd.DataFrame(columns=[
    "Dataset", "Max Rating Paper", "Min Rating Paper", "Mean Rating Paper"]
)
stats_dataframe = stats_dataframe.append(
    {
        "Dataset": dataset_name, 
        "Max Rating Paper": int(max_ratings_paper.values[0]), 
        "Min Rating Paper": int(min_ratings_paper.values[0]), 
        "Mean Rating Paper": mean_ratings_paper.values[0], 
    }, ignore_index=True)
stats_dataframe.to_csv(stats_filename, index=False)

print("---------- STATS GENERATION COMPLETED ----------")


---------- STATS GENERATION STARTED ----------
MAX NUMBER OF RATINGS FOR A PAPER:  2
MIN NUMBER OF RATINGS FOR A PAPER:  2
MEAN NUMBER OF RATINGS FOR A PAPER:  2.0
NUMBER OF PAPERS WITH UNIQUE RATING:  0
---------- STATS GENERATION COMPLETED ----------
