# Metrics Reading Tools
for paper "Transformer-Powered Content-Aware Collaborative Filtering with Cross-System Contrastive Learning"

Author: Weizhe Lin

Created: 10/10/2021 for Github Release

In [None]:
import os
import json
from pprint import pprint
from easydict import EasyDict
import numpy as np
from tqdm import tqdm
import pandas as pd
from tensorboard.backend.event_processing import event_accumulator

In [None]:
def read_json(file_path):
    if not os.path.exists(file_path):
        return None
    with open(file_path, 'r') as f:
        temp = json.loads(f.read())
    return temp

In [None]:
def read_tb(tb_file):
    ea = event_accumulator.EventAccumulator(tb_file,
    size_guidance={ # see below regarding this argument
        event_accumulator.COMPRESSED_HISTOGRAMS: 500,
        event_accumulator.IMAGES: 4,
        event_accumulator.AUDIO: 4,
        event_accumulator.SCALARS: 0,
        event_accumulator.HISTOGRAMS: 1,
    })
    ea.Reload()
    return ea
def find_best_performance(ea, to_find='test/recall_at_20'):
    metrics_data = pd.DataFrame(ea.Scalars(to_find))
    best_epoch = metrics_data['step'][metrics_data['value'].idxmax(axis=-1)]
    best_performance = {}
    for metric in ea.scalars.Keys():
        cur_metrics_data = pd.DataFrame(ea.Scalars(metric))
        cur_metrics_data = cur_metrics_data[cur_metrics_data['step']==best_epoch]
#         print(cur_metrics_data.iloc[0])
        best_performance.setdefault(metric, {
            'best':cur_metrics_data.iloc[0]['value'],
            'epoch': best_epoch
        })
#     print(best_performance)
    return best_performance
    

In [None]:
# all experiment folders
EXPERIMENT_FOLDERS = [
    '/path/to/project/Experiments',
]
# all tensorboard folders
TB_FOLDERS = [
    '/path/to/project/Data_TB/tb_logs',
]
# which metric is considered the best epoch
READ_METRICS_AT_BEST = 'test/recall_at_100'

In [None]:
experiments = {}
for exp_folder in EXPERIMENT_FOLDERS:
    if os.path.exists(exp_folder):
        for exp_name in tqdm(os.listdir(exp_folder)):
            experiments.setdefault(exp_name, {})
            exp_path = os.path.join(exp_folder, exp_name)
            config_path = os.path.join(exp_path, 'train', 'config.jsonnet')
            metrics_path = os.path.join(exp_path, 'train', 'saved_model', 'metrics.json')
            tb_path = ''
            for tb_folder in TB_FOLDERS:
                test_tb_path = os.path.join(tb_folder, exp_name)
                if os.path.exists(test_tb_path):
                    tb_path = test_tb_path
                    break
            best_perf = None
            if tb_path:
                tb_files = os.listdir(tb_path)
                #print('found tb_path', tb_files)
                for tb_file in tb_files:
                    try:
                        ea = read_tb(os.path.join(tb_path, tb_file))
                        best_perf = find_best_performance(ea, READ_METRICS_AT_BEST)
                        break
                    except:
                        print('reading failed at', os.path.join(tb_path, tb_file))
                        pass
                    
            metrics = read_json(metrics_path)
            if metrics is not None and best_perf is not None:
                metrics.update(best_perf)
            print(exp_name)
#             print(metrics)
            experiments[exp_name] = {
                'exp_name': exp_name,
                'path': exp_path,
                'metrics': metrics,
            }
    else:
        print('this path not exist')
experiments = EasyDict(experiments)
# pprint(experiments)

In [None]:
# Filter out invalid experiments
filtered_experiments = {exp_name:exp_data for exp_name, exp_data in experiments.items() if exp_data.metrics is not None}

In [None]:
exp_data_dict = {}

for exp_id, exp_name in enumerate(filtered_experiments.keys()):
    exp_data_dict.setdefault(exp_id, {})
    gather = {}
    gather['exp_name'] = exp_name
    for metrics_name, metrics_data in filtered_experiments[exp_name].metrics.items():
        gather[metrics_name] = metrics_data.best
    exp_data_dict[exp_id] = gather

In [None]:
exp_pd_data = pd.DataFrame.from_dict(exp_data_dict).T

# Amazon Dataset

In [None]:
select_experiments = [
    'NRMS_BERT_Amazon_LR_0.0001_Layer_3_History_10_NoAttMask',
    #...
]

# Movie Dataset
## 19days

In [None]:
select_experiments = [
    "NRMS_BERT_Movie_19d_ColdStart_LR_0.0005_Layer_1_History_30_NoAttMask",
    #...
]

## 39 days

In [None]:
select_experiments = [
    #...
]

# Get Metrics

In [None]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)

In [None]:
show_df = exp_pd_data.loc[exp_pd_data['exp_name'].isin(select_experiments)].sort_values(by='test/recall_at_20', ascending=False)

In [None]:
show_df

In [None]:
show_df[['exp_name', 'recall_at_20', 'recall_at_60', 'recall_at_100', 'ndcg_at_20', 'ndcg_at_60', 'ndcg_at_100', 'hit_ratio_at_20', 'hit_ratio_at_60', 'hit_ratio_at_100']]

In [None]:
show_df[['exp_name', 'test/recall_at_20', 'test/recall_at_60', 'test/recall_at_100', 'test/ndcg_at_20', 'test/ndcg_at_60', 'test/ndcg_at_100', 'test/hit_ratio_at_20', 'test/hit_ratio_at_60', 'test/hit_ratio_at_100']]

In [None]:
show_df[['exp_name', 'recall_at_20', 'cold_start_recall_at_20', 'ndcg_at_20', 'cold_start_ndcg_at_20']]