In [1]:
import dpp
import numpy as np
import torch
import torch.nn as nn
import torch.distributions as td
from copy import deepcopy
import matplotlib.pyplot as plt
import seaborn as sns
torch.set_default_tensor_type(torch.cuda.FloatTensor)
import matplotlib
import pandas as pd
import pickle
from sklearn.metrics import f1_score
from sklearn.metrics import top_k_accuracy_score, f1_score
from dpp.eval_funcs import *

# matplotlib.rcParams['text.usetex'] = True
# plt.style.use(['science', 'ieee'])



# load dataset and model

In [2]:
device = 'cuda:0'
time_threshold = 15
trip_threshold = 5

In [3]:
def calculate_correct_total_prediction(logits, true_y):

    # top_ = torch.eq(torch.argmax(logits, dim=-1), true_y).sum().cpu().numpy()
    top1 = []
    result_ls = []
    for k in [1, 3, 5, 10]:
        if logits.shape[-1] < k:
            k = logits.shape[-1]
        prediction = torch.topk(logits, k=k, dim=-1).indices
        # f1 score
        if k == 1:
            top1 = torch.squeeze(prediction).cpu()
            # f1 = f1_score(true_y.cpu(), prediction.cpu(), average="weighted")

        top_k = torch.eq(true_y[:, None], prediction).any(dim=1).sum().cpu().numpy()
        # top_k = np.sum([curr_y in pred for pred, curr_y in zip(prediction, true_y)])
        result_ls.append(top_k)
    # f1 score
    # result_ls.append(f1)
    # rr
    result_ls.append(get_mrr(logits, true_y))
    # ndcg
    result_ls.append(get_ndcg(logits, true_y))

    # total
    result_ls.append(true_y.shape[0])

    return np.array(result_ls, dtype=np.float32), true_y.cpu(), top1


def get_mrr(prediction, targets):
    """
    Calculates the MRR score for the given predictions and targets.

    Args:
        prediction (Bxk): torch.LongTensor. the softmax output of the model.
        targets (B): torch.LongTensor. actual target indices.

    Returns:
        the sum rr score
    """
    index = torch.argsort(prediction, dim=-1, descending=True)
    hits = (targets.unsqueeze(-1).expand_as(index) == index).nonzero()
    ranks = (hits[:, -1] + 1).float()
    rranks = torch.reciprocal(ranks)

    return torch.sum(rranks).cpu().numpy()


def get_ndcg(prediction, targets, k=10):
    """
    Calculates the NDCG score for the given predictions and targets.

    Args:
        prediction (Bxk): torch.LongTensor. the softmax output of the model.
        targets (B): torch.LongTensor. actual target indices.

    Returns:
        the sum rr score
    """
    index = torch.argsort(prediction, dim=-1, descending=True)
    hits = (targets.unsqueeze(-1).expand_as(index) == index).nonzero()
    ranks = (hits[:, -1] + 1).float().cpu().numpy()

    not_considered_idx = ranks > k
    ndcg = 1 / np.log2(ranks + 1)
    ndcg[not_considered_idx] = 0

    return np.sum(ndcg)


def get_performance_dict(return_dict):
    perf = {
        "correct@1": return_dict["correct@1"],
        "correct@3": return_dict["correct@3"],
        "correct@5": return_dict["correct@5"],
        "correct@10": return_dict["correct@10"],
        "rr": return_dict["rr"],
        "ndcg": return_dict["ndcg"],
        "f1": return_dict["f1"],
        "total": return_dict["total"],
    }

    perf["acc@1"] = perf["correct@1"] / perf["total"] * 100
    perf["acc@5"] = perf["correct@5"] / perf["total"] * 100
    perf["acc@10"] = perf["correct@10"] / perf["total"] * 100
    perf["mrr"] = perf["rr"] / perf["total"] * 100
    perf["ndcg"] = perf["ndcg"] / perf["total"] * 100

    return perf


def get_top_k_values(arr, k):
    # 对向量进行排序并获取索引
    sorted_indices = np.argsort(arr)
    # 获取最大的K个值的索引
    top_indices = sorted_indices[-k:]
    # 获取最大的K个值
    top_values = arr[top_indices]
    return top_values, top_indices

In [4]:
ACC_1 = []
ACC_5 = []
ACC_10 = []
F1s = []
MRRs = []
NDCGs = []

In [5]:
import os

mode_ = 'TrajTPP'
folder_path = f'./log/{mode_}-15-5/'
file_names = os.listdir(folder_path)
file_names = np.sort(file_names)
file_names = file_names[:5]

file_names

array(['log-2024-04-08-16-29-01', 'log-2024-04-08-16-34-44',
       'log-2024-04-08-16-40-35', 'log-2024-04-08-16-46-57',
       'log-2024-04-08-16-52-53'], dtype='<U23')

In [6]:
decoder_name = 'LogNormMix' # other: ['RMTPP', 'FullyNeuralNet', 'Exponential', 'SOSPolynomial', 'DeepSigmoidalFlow']
batch_size = 1024

## General data config
dataset_name = './dataset/geolife'
dataset_name = f'{dataset_name}-{time_threshold}-{trip_threshold}/'

d_train, num_drivers = dpp.data.load_dataset(f'{dataset_name}train', device=device)
d_val, _ = dpp.data.load_dataset(f'{dataset_name}val', device=device)
d_test, _ = dpp.data.load_dataset(f'{dataset_name}test', device=device)

# Calculate mean and std of the input inter-event times and normalize only input
mean_in_train, std_in_train = d_train.get_mean_std_in()
std_out_train = 1.0
d_train.normalize(mean_in_train, std_in_train, std_out_train)
d_val.normalize(mean_in_train, std_in_train, std_out_train)
d_test.normalize(mean_in_train, std_in_train, std_out_train)

# Break down long train sequences for faster batch traning and create torch DataLoaders
d_train.break_down_long_sequences(128)
collate = dpp.data.collate
dl_train = torch.utils.data.DataLoader(d_train, batch_size=batch_size, shuffle=True, collate_fn=collate, generator=torch.Generator(device=device))
dl_val = torch.utils.data.DataLoader(d_val, batch_size=1, shuffle=False, collate_fn=collate, generator=torch.Generator(device=device))
dl_test = torch.utils.data.DataLoader(d_test, batch_size=1, shuffle=False, collate_fn=collate, generator=torch.Generator(device=device))

# Set the parameters for affine normalization layer depending on the decoder (see Appendix D.3 in the paper)
if decoder_name in {'RMTPP', 'FullyNeuralNet', 'Exponential'}:
    _, std_out_train = d_train.get_mean_std_out()
    mean_out_train = 0.0
else:
    mean_out_train, std_out_train = d_train.get_log_mean_std_out()
    
upper_inter_time = 1440
lower_inter_time = 1e-2
upper_boundary = (np.log(upper_inter_time)-mean_in_train) / std_in_train
lower_boundary = (np.log(lower_inter_time)-mean_in_train) / std_in_train

In [7]:
for i in range(len(file_names)):
    log_name = file_names[i]
    mode = f'{mode_}-{time_threshold}-{trip_threshold}'

    seed = 3407
    np.random.seed(seed)
    torch.manual_seed(seed)
    
    with open(f'./log/{mode}/{log_name}/evaluation_tpp.pkl', 'rb') as f:
        data = pickle.load(f)    
        
    y_mark = data['y_mark']
    y_mark_hat = data['y_mark_hat']
    y_mark_hat_prob = data['y_mark_hat_prob']
    
    true_ls = []
    top1_ls = []
    time_ls = []

    result_arr = np.array([0, 0, 0, 0, 0, 0, 0], dtype=np.float32)
    result_dict = {}
    count_user = {}
    for i in range(1, num_drivers):
        result_dict[i] = np.array([0, 0, 0, 0, 0, 0, 0], dtype=np.float32)
        count_user[i] = 0
        
    logits = torch.tensor(y_mark_hat_prob)
    y = torch.tensor(y_mark)

    batch_result_arr, batch_true, batch_top1 = calculate_correct_total_prediction(logits, y)
    result_arr += batch_result_arr
    true_ls.extend(batch_true.numpy())
    top1_ls.extend(batch_top1.numpy())
    f1 = f1_score(true_ls, top1_ls, average="weighted")
        
    return_dict = {
        "correct@1": result_arr[0],
        "correct@3": result_arr[1],
        "correct@5": result_arr[2],
        "correct@10": result_arr[3],
        "f1": f1,
        "rr": result_arr[4],
        "ndcg": result_arr[5],
        "total": result_arr[6]
        }
    result_arr_user = result_dict

    performance = get_performance_dict(return_dict)
    performance["type"] = "test"
    # print(performance)

    result_user_df = pd.DataFrame(result_arr_user).T
    result_user_df.columns = [
        "correct@1",
        "correct@3",
        "correct@5",
        "correct@10",
        "rr",
        "ndcg",
        "total",
    ]
    result_user_df.index.name = "user"
    
    for i in [1, 5, 10]:
        acc_ = top_k_accuracy_score(
            y_true=y_mark,
            y_score=y_mark_hat_prob,
            labels=np.arange(1187),
            k=i
        )
        
        print(f'acc@{i}: {acc_:.3f}')

    f1_score_ = f1_score(y_true=y_mark, y_pred=y_mark_hat, average='weighted')
    print(f'f1_score: {f1_score_:.3f}')

    result = {}
    top_k_list = [1, 5, 10]

    for topk in top_k_list:
        intermediate_result = {}
        hit, rank, dcg = top_k(loc_pred=y_mark_hat_prob, loc_true=y_mark, topk=topk)

        intermediate_result['hit'] = hit
        intermediate_result['rank'] = rank
        intermediate_result['dcg'] = dcg
        intermediate_result['total'] = y_mark.shape[0]

        precision_key = 'Precision@{}'.format(topk)
        precision = intermediate_result['hit'] / (
                intermediate_result['total'] * topk)
        result[precision_key] = precision

        # recall is used to valid in the trainning, so must exit
        recall_key = 'Recall@{}'.format(topk)
        recall = intermediate_result['hit'] \
                    / intermediate_result['total']
        result[recall_key] = recall
        f1_key = 'F1@{}'.format(topk)
        if precision + recall == 0:
            result[f1_key] = 0.0
        else:
            result[f1_key] = (2 * precision * recall) / (precision +
                                                                recall)
            
        mrr_key = 'MRR@{}'.format(topk)
        result[mrr_key] = intermediate_result['rank'] \
                                / intermediate_result['total']
        map_key = 'MAP@{}'.format(topk)
        result[map_key] = intermediate_result['rank'] \
                                / intermediate_result['total']
        ndcg_key = 'NDCG@{}'.format(topk)
        result[ndcg_key] = intermediate_result['dcg'] \
                                / intermediate_result['total']

    df = np.zeros(shape=(len(top_k_list), int(len(result.keys())/len(top_k_list))))

    for (i, key) in enumerate(result.keys()):
        j = i // int(len(result.keys())/len(top_k_list))
        k = i % int(len(result.keys())/len(top_k_list))

        df[j, k] = result[key]

    df = pd.DataFrame(df, columns=['Precision', 'Recall', 'F1', 'MRR', 'MAP', 'NDCG'], index=top_k_list)

    ACC_1.append(result['Recall@1'])
    ACC_5.append(result['Recall@5'])
    ACC_10.append(result['Recall@10'])
    F1s.append(f1_score_)
    MRRs.append(result['MRR@10'])
    NDCGs.append(result['NDCG@10'])

acc@1: 0.334
acc@5: 0.574
acc@10: 0.613
f1_score: 0.256
acc@1: 0.337
acc@5: 0.587
acc@10: 0.629
f1_score: 0.245
acc@1: 0.347
acc@5: 0.584
acc@10: 0.625
f1_score: 0.246
acc@1: 0.343
acc@5: 0.590


acc@10: 0.635
f1_score: 0.238
acc@1: 0.330
acc@5: 0.581
acc@10: 0.606
f1_score: 0.231


In [8]:
ACC_1 = np.array(ACC_1)
ACC_5 = np.array(ACC_5)
ACC_10 = np.array(ACC_10)
F1s = np.array(F1s)
MRRs = np.array(MRRs)
NDCGs = np.array(NDCGs)

In [9]:
print('----------ACC_1:----------')
print(ACC_1.mean()*100, ACC_1.std()*100)

----------ACC_1:----------
33.84793292859208 0.6239223340949963


In [10]:
print('----------ACC_5:----------')
print(ACC_5.mean()*100, ACC_5.std()*100)

----------ACC_5:----------
58.32899681989014 0.5474932900267485


In [11]:
print('----------ACC_10:----------')
print(ACC_10.mean()*100, ACC_10.std()*100)

----------ACC_10:----------
62.1740387395201 1.0848951706581522


In [12]:
print('----------F1s:----------')
print(F1s.mean()*100, F1s.std()*100)

----------F1s:----------
24.310745167745797 0.8364300844545187


In [13]:
print('----------MRRs:----------')
print(MRRs.mean()*100, MRRs.std()*100)

----------MRRs:----------
44.32104195634105 0.6580350245919784


In [14]:
print('----------NDCGs:----------')
print(NDCGs.mean()*100, NDCGs.std()*100)

----------NDCGs:----------
48.71933044542166 0.7178390328452898
