In [None]:
import os, sys
import pandas as pd
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import zipfile
import shutil
from pathlib import Path

In [None]:
import warnings
warnings.filterwarnings('ignore')

### Loading packages

In [None]:
import sys
from pathlib import Path

here_path = Path().resolve()
repo_path = here_path.parents[1]
sys.path.append(str(repo_path))

In [None]:
from py.utils import verifyDir, verifyFile, verifyType

In [None]:
from py.config import Config

cfg = Config()

np.random.seed(cfg.RANDOM_STATE)
cfg.DATA_PATH, cfg.MODEL_PATH

In [None]:
QSCORE_PATH=f"{cfg.DATA_PATH}pp2/{cfg.SCORING_METHOD}/{cfg.PLACE_LEVEL}/"
IMAGES_PATH = f"{cfg.DATA_PATH}pp2/images/"
COMPARISONS_PATH=f"{cfg.DATA_PATH}pp2/comparisons/"

In [None]:
verifyDir(QSCORE_PATH)

### Loading data

In [None]:
%%time
comparisons_df = pd.read_csv(zipfile.ZipFile(f"{COMPARISONS_PATH}comparisons.zip").open("comparisons.csv"), sep=";", low_memory=False)
comparisons_df

#### Calculate Scores

In [None]:
from py.datasets.comparisons import Comparisons

comp = Comparisons(comparisons_df, method_name=cfg.SCORING_METHOD, place_level=cfg.PLACE_LEVEL)

In [None]:
metrics = comp.get_metrics()
metrics

In [None]:
columns_to_keep= ["image_id", "lat", "long", "city", "country", "continent", "AHPweight", "AHPScore", "EloRating", "EloScore", "Qscore"]

In [None]:
%%time
summary_df = pd.DataFrame()

for metric in tqdm(metrics):
    verifyDir(f"{QSCORE_PATH}{metric}/")

    columns_change = {"AHPweight": f"{metric}_AHPweight", 
                      "AHPScore": f"{metric}",
                      "EloRating": f"{metric}_EloRating", 
                      "EloScore": f"{metric}",
                      "Qscore": metric,
                     }

    name_file = ""
    if "ahp" in cfg.SCORING_METHOD.lower():
        name_file = "weights"
    elif "elo" in cfg.SCORING_METHOD.lower():
        name_file = "ratings"
    else:
        name_file = "matches"

    if verifyFile(f"{QSCORE_PATH}{metric}/{name_file}.csv"):
        results_df = pd.read_csv(f"{QSCORE_PATH}{metric}/{name_file}.csv", sep=";", low_memory=False)

    else:
        if "ahp" in cfg.SCORING_METHOD.lower():
            comp.calculate(metric=metric, method="dict")
        else:
            comp.calculate(metric=metric)
        
        if "ahp" in cfg.SCORING_METHOD.lower() or "elo" in cfg.SCORING_METHOD.lower():
            comp.normalize(min_range=0, max_range=10)
        results_df = comp.get_scores()
        results_df.to_csv(f"{QSCORE_PATH}{metric}/{name_file}.csv", sep=";", index=False)

    results_df = results_df[[c for c in columns_to_keep if c in results_df.columns]].copy()
    results_df.rename(columns=columns_change, inplace=True)
    if len(summary_df)==0:
        summary_df = results_df.copy()
    else:
        summary_df = pd.merge(summary_df, results_df, how="outer", on=["image_id", "lat", "long", "city", "country", "continent"])

#### Adding image paths

In [None]:
base_path = Path(IMAGES_PATH)
img_path_list = np.sort([str(p.relative_to(base_path)) for p in base_path.glob('*/*.JPG')])
image_path_dict = {path.split("/")[-1].replace(".JPG",""): path for path in img_path_list}

In [None]:
%%time
summary_df["image_path"] = summary_df["image_id"].map(image_path_dict)
summary_df = summary_df[~summary_df["image_path"].isna()].copy()
summary_df.fillna(0., inplace=True)
summary_df

In [None]:
summary_df.to_csv(f"{QSCORE_PATH}scores.csv", sep=";", index=False)

#### Aggregate information

In [None]:
aggregate_mean_df = pd.pivot_table(summary_df,
                      index=["city", "country", "continent"],
                      values=["image_id", "safety", "beautiful", "wealthy", "lively", "boring", "depressing"],
                      aggfunc = {
                          "image_id": len,
                          "safety": "mean",
                          "beautiful": "mean",
                          "wealthy": "mean",
                          "lively": "mean",
                          "boring": "mean",
                          "depressing": "mean",
                      }).rename(columns={"image_id": "num_images"}).reset_index().set_index(["city", "country", "continent", "num_images"])
aggregate_mean_df.columns = [ f"{c}_mean" for c in aggregate_mean_df.columns ]

aggregate_std_df = pd.pivot_table(summary_df,
                      index=["city", "country", "continent"],
                      values=["image_id", "safety", "beautiful", "wealthy", "lively", "boring", "depressing"],
                      aggfunc = {
                          "image_id": len,
                          "safety": "std",
                          "beautiful": "std",
                          "wealthy": "std",
                          "lively": "std",
                          "boring": "std",
                          "depressing": "std",
                      }).rename(columns={"image_id": "num_images"}).reset_index().set_index(["city", "country", "continent", "num_images"])
aggregate_std_df.columns = [ f"{c}_std" for c in aggregate_std_df.columns ]

aggregate_df = pd.merge(aggregate_mean_df, aggregate_std_df, on=["city", "country", "continent", "num_images"])
aggregate_df.reset_index(inplace=True)

new_columns = ["city", "country", "continent", "num_images"]
for metric in metrics:
    new_columns.append(f"{metric}_mean")
    new_columns.append(f"{metric}_std")

aggregate_df = aggregate_df[new_columns].copy()
aggregate_df.to_csv(f"{QSCORE_PATH}aggregate_statistics.csv", sep=";", index=False)