In [64]:
import pandas as pd
import seaborn as sb
import numpy as np
import math as m
import random as rn
from matplotlib import pyplot as plt
import scipy as sp
from scipy.stats import truncnorm as tn

papers_number = 2
readers_number = 20
ratings_number = readers_number * papers_number
authors_number = 6

papers = np.arange(papers_number)
readers = np.arange(readers_number)
ratings = np.arange(ratings_number)
authors = np.arange(authors_number)

# Seed folder path

dataset_name = "seed_1"
seed_folder_path = f"data/seed/{dataset_name}/"
info_filename = f"{seed_folder_path}info.csv"
ratings_filename = f"{seed_folder_path}ratings.csv"
authors_filename = f"{seed_folder_path}authors.csv"

# Info file generation

info_dataframe = pd.DataFrame(columns=["Dataset", "Paper", "Reader", "Rating", "Author"])
info_dataframe = info_dataframe.append(
    {
        "Dataset": dataset_name.capitalize(), 
        "Paper": papers_number, 
        "Reader": readers_number, 
        "Rating": ratings_number, 
        "Author": authors_number
    }, ignore_index=True)
info_dataframe.to_csv(info_filename, index=False)

# Ratings file generation

ratings_dataframe = pd.DataFrame(columns=["Timestamp", "Reader", "Paper", "Score", "Author"])
tuples = []
print("---------- RANDOM RATINGS GENERATION STARTED ----------")
generated_ratings = 0
for reader in readers:
    for index, paper in enumerate(papers):
        percentage = 100*generated_ratings/(readers_number*papers_number)
        if percentage % 10 == 0:
            print(f"{int(percentage)}/100%", end=" ")
        current_tuple = {
            "Reader": reader, 
            "Paper": paper, 
            "Score": (tn(0, 1, scale=1).rvs(1))[0], 
            "Author": np.random.choice(authors_number, 1)[0]
        }
        generated_ratings+=1
        tuples.append(current_tuple)
print("100/100%")
print("---------- RANDOM RATINGS GENERATION ENDED ----------")
print("---------- RATINGS APPENDING STARTED ----------")
for index, current_tuple in enumerate(tuples):
    percentage = round((100*index/len(tuples)), 2)
    if percentage % 10 == 0:
        print(f"{int(percentage)}/100%", end=" ")
    ratings_dataframe = ratings_dataframe.append(
        {
            "Timestamp": index,
            "Reader": current_tuple["Reader"],
            "Paper": current_tuple["Paper"], 
            "Score": current_tuple["Score"], 
            "Author": current_tuple["Author"]
        }, ignore_index=True)
print("100/100%")
print("---------- RATINGS APPENDING ENDED ----------")
ratings_dataframe["Timestamp"] = ratings_dataframe["Timestamp"].astype(int)
ratings_dataframe["Reader"] = ratings_dataframe["Reader"].astype(int)
ratings_dataframe["Paper"] = ratings_dataframe["Paper"].astype(int)
ratings_dataframe["Author"] = ratings_dataframe["Author"].astype(int)
ratings_dataframe.to_csv(ratings_filename, index=False)

print("---------- RANDOM AUTHORS GENERATION STARTED ----------")
print("---------- RATINGS APPENDING STARTED ----------")
authors_dataframe = pd.DataFrame(columns=["Author", "Papers"])
for author in authors:
    # An author writes a number of paper between 1 and paper_fraction
    author_papers_number = rn.randint(0, (papers_number-1))
    papers_written = np.random.choice(papers, author_papers_number).tolist()
    papers_written = set(papers_written)
    papers_written = map(str, list(papers_written))
    papers_written = ";".join(papers_written)
    authors_dataframe = authors_dataframe.append(
        {
            "Author": author,
            "Papers": papers_written
        }, ignore_index=True)
print("---------- RANDOM AUTHORS GENERATION ENDED ----------")
print("---------- RATINGS APPENDING ENDED ----------")
authors_dataframe.to_csv(authors_filename, index=False)

---------- RANDOM RATINGS GENERATION STARTED ----------
0/100% 10/100% 20/100% 30/100% 40/100% 50/100% 60/100% 70/100% 80/100% 90/100% 100/100%
---------- RANDOM RATINGS GENERATION ENDED ----------
---------- RATINGS APPENDING STARTED ----------
0/100% 10/100% 20/100% 30/100% 40/100% 50/100% 60/100% 70/100% 80/100% 90/100% 100/100%
---------- RATINGS APPENDING ENDED ----------
---------- RANDOM AUTHORS GENERATION STARTED ----------
---------- RATINGS APPENDING STARTED ----------
---------- RANDOM AUTHORS GENERATION ENDED ----------
---------- RATINGS APPENDING ENDED ----------


In [63]:
# Summary

print("RANDOM INFO:       ", info_dataframe)
print("RANDOM RATINGS:  ", ratings_dataframe.head(10))
print("RANDOM AUTHORS:  ", authors_dataframe.head(10))

RANDOM INFO:          Dataset Paper Reader Rating Author
0  Seed_1     2     20     40      2
RANDOM RATINGS:      Timestamp  Reader  Paper     Score  Author
0          0       0      0  0.455301       1
1          1       0      1  0.022488       0
2          2       1      0  0.917206       0
3          3       1      1  0.086421       1
4          4       2      0  0.073629       1
5          5       2      1  0.201673       0
6          6       3      0  0.075789       0
7          7       3      1  0.427372       0
8          8       4      0  0.011814       0
9          9       4      1  0.778760       0
RANDOM AUTHORS:     Author Papers
0      0       
1      1      1
