In [None]:
import os, sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import zipfile
import shutil
from pathlib import Path

In [None]:
import warnings
warnings.filterwarnings('ignore')

### Loading packages

In [None]:
import sys
from pathlib import Path

here_path = Path().resolve()
repo_path = here_path.parents[1]
sys.path.append(str(repo_path))

In [None]:
from py.utils import verifyDir, verifyFile, verifyType

In [None]:
from py.config import Config

cfg = Config()

np.random.seed(cfg.RANDOM_STATE)
cfg.DATA_PATH, cfg.MODEL_PATH

In [None]:
RAW_PATH=f"{cfg.DATA_PATH}pp2/raw_data/"
IMAGES_PATH = f"{cfg.DATA_PATH}pp2/images/"
QRANKS_PATH = f"{cfg.DATA_PATH}pp2/Qranks/"

In [None]:
verifyDir(QRANKS_PATH)

### Loading data

In [None]:
from py.datasets import PlacePulse

pp = PlacePulse()

### Studies/categories evaluated

In [None]:
studies_df = pd.read_csv(f"{RAW_PATH}studies.tsv", sep="\t").rename(columns={"_id":"study_id"})
# changing category names
studies_df["category"]=studies_df["study_question"].apply(lambda x: pp.map_category(x))
# deleting owner, study_name, study_public, study_question
studies_df = studies_df[["study_id", "category", "num_votes"]]
studies_df.sort_values(by="num_votes", inplace=True, ascending=False)
# studies_df.to_csv(f"{PROCESSED_PATH}studies.csv", index=False)
studies_df

### Rank Scores (Qranks)

In [None]:
qranks_df = pd.read_csv(f"{RAW_PATH}qscores.tsv", sep="\t").rename(columns={"trueskill.score":"trueskill_score", "trueskill.stds.-1":"trueskill_std"})
# merging with studies
qranks_df = pd.merge(qranks_df, studies_df, how="left", on="study_id")
qranks_df = qranks_df[["location_id", "trueskill_score", "trueskill_std", "category"]].copy()
qranks_df.rename(columns={"location_id":"image_id"}, inplace=True)
qranks_df.sort_values(by=["image_id"], inplace=True)
qranks_df

In [None]:
pp2_qranks_df = pd.pivot_table(qranks_df,
                index=['image_id'],
                columns=["category"],
                ).reset_index()
pp2_qranks_df.columns = ["_".join([col[1], col[0].split("_")[1]]) if len(col[1])>=1 and len(col[0])>=1 else "".join(col) for col in pp2_qranks_df.columns]
pp2_qranks_df

### Saving final file

In [None]:
%%time
pp2_qranks_df.to_csv(f"{QRANKS_PATH}ranks.csv", sep=";", index=False)

In [None]:
%%time
zip_path = f"{QRANKS_PATH}/ranks.zip"
csv_path = f"{QRANKS_PATH}/ranks.csv"

with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_LZMA) as zipObj:
    zipObj.write(csv_path, arcname="ranks.csv")
    zipObj.close()