## Imports and Setup

In [None]:
# parameters
target_directory = '/content/drive/My Drive/Thesis_Repo/Code'
target_dataset = 'Googlemap_CT'

In [None]:
import os
from google.colab import drive
drive.mount('/content/drive')
os.chdir(target_directory)

Mounted at /content/drive


In [None]:
!pip install bert-score

Collecting bert-score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bert-score
Successfully installed bert-score-0.3.13


In [None]:
import logging
import time
import sys
import os
from tqdm import tqdm
import numpy as np
import warnings
import shutil
import json
import torch
import torch.nn as nn
from bert_score import score as bert_sim_score
import pandas as pd
import pickle
from transformers import (AutoConfig, AutoModel, AutoModelForSeq2SeqLM,
                          AutoTokenizer, GenerationConfig, LlamaForCausalLM,
                          LlamaTokenizer, pipeline, AutoModelForCausalLM)
import yaml
from huggingface_hub import login

from models.TGAT import TGAT
from models.MemoryModel import MemoryModel, compute_src_dst_node_time_shifts
from models.CAWN import CAWN
from models.TCL import TCL
from models.GraphMixer import GraphMixer
from models.DyGFormer import DyGFormer
from models.modules import MergeLayer, MLPClassifier, MLPClassifier_edge, NMergeLayer
from utils.utils import set_random_seed, convert_to_gpu, get_parameter_sizes, create_optimizer
from utils.utils import get_neighbor_sampler
from evaluate_models_utils import evaluate_model_edge_classification
from utils.metrics import get_edge_classification_metrics
from utils.DataLoader import Data, get_idx_data_loader, get_edge_classification_data
from utils.EarlyStopping import EarlyStopping
from utils.load_configs import get_edge_classification_args

In [None]:
input_args = "train_edge_classification.py --dataset_name " + target_dataset + " --model_name DyGFormer --patch_size 2 --max_input_sequence_length 64 --num_runs 1 --num_epochs 8 --test_interval_epochs 1 --gpu 0 --use_feature Bert"
sys.argv = input_args.split(" ")
args = get_edge_classification_args(is_evaluation=False)

## [Run Once Per Dataset] Sentiment Data Setup

In [None]:
# Can skip this step if already run in Run_DTGB_Train_Edge_Retrieval.ipynb

from get_pretrained_embeddings import get_pretrained_embeddings

get_pretrained_embeddings(args.dataset_name)

In [None]:
import warnings

warnings.simplefilter("ignore")

login("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")

model_type = 'siebert' #'mistral'
pipe = None
prompt = '[TGT]'

if model_type == 'mistral':
    model_path = "mistralai/Mistral-7B-Instruct-v0.2"
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto")

    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=10,
        temperature=0.1,
        repetition_penalty=1.15,
        device=args.device
    )

    prompt = """[INST] Classify the sentiment of the following review for a google maps store as either positive, negative, or neutral. Complete the last label with the word positive, neutral, or negative.

    Review: a real gem. Amazing food!
    Label: positive

    Review: Rude staff!
    Label: negative

    Review: Pretty expensive, but food is decent
    Label: neutral

    Review: [TGT]
    Label: [/INST]"""

    prompt = prompt.replace('[TGT]', "A good breakfast place. There aren't many diners where I can get a Texas chili omelette. This happens to be one of those places.")

else:
    pipe = pipeline("sentiment-analysis",model="siebert/sentiment-roberta-large-english",device=args.device,batch_size=16)

config.json:   0%|          | 0.00/687 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/256 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

In [None]:
import warnings

warnings.simplefilter("ignore")

class_id_to_name = {0: 'negative', 1: 'positive', 2: 'neutral', -1: 'unknown'}
def outputToClass(optTxt):
    optTxt = optTxt.lower()
    if 'negative' in optTxt[0:25]:
        return 0
    elif 'positive' in optTxt[0:25]:
        return 1
    elif 'neutral' in optTxt[0:25]:
        return 2
    return -1 # failure case

def get_data(tgt_texts, prompt):
    for i in range(len(tgt_texts)):
        tgt_texts[i] = pipe.tokenizer.decode(pipe.tokenizer.encode(
                  str(tgt_texts[i]),
                  truncation=True,
                  max_length=400
              ))
        query = prompt.replace('[TGT]', tgt_texts[i][3:-4])
        yield query

def get_edge_sentiment_data(edge_texts, args, model_type, pipe, prompt, num_train = 5000, num_val = 1000, num_test = 1000):

    dataset_name=args.dataset_name
    val_ratio=args.val_ratio
    test_ratio=args.test_ratio

    node_raw_features, edge_raw_features, full_data, train_data, val_data, test_data, new_node_val_data, new_node_test_data, cat_num = get_edge_classification_data(dataset_name=args.dataset_name, val_ratio=args.val_ratio, test_ratio=args.test_ratio, args = args)

    # Store info
    train_data_no_labels = train_data.copy()
    full_data_no_labels = full_data.copy()

    # Calculate subset
    for (data, n_samples) in [(train_data, num_train), (val_data, num_val), (test_data, num_test)]:

        # Select desired number of entries
        shuffledIndices = np.arange(len(data.labels))
        np.random.shuffle(shuffledIndices)
        mask = np.zeros(len(data.labels), dtype=bool)
        mask[shuffledIndices[:n_samples]] = True
        data.apply_mask(mask)

        # Calculate the new labels
        tgt_texts = edge_texts.set_index('i').loc[data.edge_ids, 'text'].tolist()
        tgt_texts = [str(txt) for txt in tgt_texts]

        i = 0
        for results in tqdm(pipe(get_data(tgt_texts, prompt), batch_size=16), total=len(tgt_texts)):
          data.labels[i] = outputToClass(results['label'].lower())
          i += 1

        # Remove all unknown data labels
        mask = ~(data.labels == -1)
        data.apply_mask(mask)

    new_node_val_data = None
    new_node_test_data = None
    cat_num = 3 if model_type == 'mistral' else 2
    return  node_raw_features, edge_raw_features, full_data_no_labels, train_data_no_labels, train_data, val_data, test_data, new_node_val_data, new_node_test_data, cat_num


edge_texts = pd.read_csv('../DyLink_Datasets/' + args.dataset_name + '/relation_text.csv')

set_random_seed(seed=0)
node_raw_features, edge_raw_features, full_data_no_labels, train_data_no_labels, train_data, val_data, test_data, new_node_val_data, new_node_test_data, cat_num = get_edge_sentiment_data(edge_texts, args, model_type, pipe, prompt, 200000, 25000, 25000)
processed_data = [node_raw_features, edge_raw_features, full_data_no_labels, train_data_no_labels, train_data, val_data, test_data, new_node_val_data, new_node_test_data, cat_num]
with open('../DyLink_Datasets/' + args.dataset_name + '/sentiment_data_250k.pkl', 'wb') as file:
    pickle.dump(processed_data, file)
print("Done")

get pretrained features
The dataset has 1000000 interactions, involving 167116 different nodes
The training dataset has 557562 interactions, involving 108947 different nodes
The validation dataset has 149963 interactions, involving 78222 different nodes
The test dataset has 149876 interactions, involving 77224 different nodes
The new node validation dataset has 95297 interactions, involving 61189 different nodes
The new node test dataset has 103478 interactions, involving 63435 different nodes
16711 nodes were used for the inductive testing, i.e. are never seen during training


100%|██████████| 200000/200000 [1:04:38<00:00, 51.56it/s]
100%|██████████| 25000/25000 [06:23<00:00, 65.26it/s]
100%|██████████| 25000/25000 [06:13<00:00, 66.96it/s]


Done


## Setup and Train Model

In [None]:
with open('../DyLink_Datasets/' + args.dataset_name + '/sentiment_data_250k.pkl', 'rb') as file:
    processed_data = pickle.load(file)
    node_raw_features, edge_raw_features, full_data_no_labels, train_data_no_labels, train_data, val_data, test_data, new_node_val_data, new_node_test_data, cat_num = processed_data

# Run label balancing (b/c default dataset has ~85% positive samples)
for data in [train_data, val_data, test_data]:
    n_neg = np.sum(data.labels == 0)
    print("Original imbalance: " + str(int(100 - 100*float(n_neg) / len(data.labels))) + "% positive")
    positive_indices = np.where(data.labels == 1)[0]
    positive_mask = np.zeros_like(data.labels, dtype=bool)
    positive_mask[positive_indices[:n_neg]] = True
    full_mask = (data.labels == 0) | positive_mask
    data.apply_mask(full_mask)

# Create in-distribution validation/test. Only done for edge classification (the recommendation side segments by time), just to demonstrate POC
all_data = train_data.calc_merged(val_data).calc_merged(test_data)
new_split = np.zeros_like(all_data.labels, dtype=int)
val_and_test_indices = np.random.choice(all_data.labels.size, size=int(0.2 * all_data.labels.size), replace=False)
new_split[val_and_test_indices[:len(val_and_test_indices)//2]] = 1
new_split[val_and_test_indices[len(val_and_test_indices)//2:]] = 2
train_data = all_data.copy()
val_data = all_data.copy()
test_data = all_data.copy()
train_data.apply_mask(new_split == 0)
val_data.apply_mask(new_split == 1)
test_data.apply_mask(new_split == 2)

"""
# This pulls original labels rather than sentiment labels

# get data for training, validation and testing
node_raw_features, edge_raw_features, full_data, train_data, val_data, test_data, new_node_val_data, new_node_test_data, cat_num = \
    get_edge_classification_data(dataset_name=args.dataset_name, val_ratio=args.val_ratio, test_ratio=args.test_ratio, args = args)
full_data_no_labels = full_data.copy()
train_data_no_labels = train_data.copy()
"""

# initialize training neighbor sampler to retrieve temporal graph
train_neighbor_sampler = get_neighbor_sampler(data=train_data_no_labels, sample_neighbor_strategy=args.sample_neighbor_strategy,
                                              time_scaling_factor=args.time_scaling_factor, seed=0)

# initialize validation and test neighbor sampler to retrieve temporal graph
full_neighbor_sampler = get_neighbor_sampler(data=full_data_no_labels, sample_neighbor_strategy=args.sample_neighbor_strategy,
                                              time_scaling_factor=args.time_scaling_factor, seed=1)
# get data loaders
train_idx_data_loader = get_idx_data_loader(indices_list=list(range(len(train_data.src_node_ids))), batch_size=args.batch_size, shuffle=True)
val_idx_data_loader = get_idx_data_loader(indices_list=list(range(len(val_data.src_node_ids))), batch_size=args.batch_size, shuffle=True)
test_idx_data_loader = get_idx_data_loader(indices_list=list(range(len(test_data.src_node_ids))), batch_size=args.batch_size, shuffle=True)

Original imbalance: 84% positive
Original imbalance: 83% positive
Original imbalance: 82% positive


## Evaluate Model

Note: This requires running the Initial Data Setup section of Run_DTGB_Train_Link_Recommendation.ipynb

In [None]:
val_metric_all_runs, new_node_val_metric_all_runs, test_metric_all_runs = [], [], []

for run in range(args.num_runs):

    set_random_seed(seed=run)

    args.seed = run
    args.save_model_name = f'edge_classification_{args.model_name}_seed{args.seed}{args.use_feature}'

    # set up logger
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger()
    logger.setLevel(logging.DEBUG)
    os.makedirs(f"./logs/{args.model_name}/{args.dataset_name}/{args.save_model_name}/", exist_ok=True)
    # create file handler that logs debug and higher level messages
    fh = logging.FileHandler(f"./logs/{args.model_name}/{args.dataset_name}/{args.save_model_name}/{str(time.time())}.log")
    fh.setLevel(logging.DEBUG)
    # create console handler with a higher log level
    ch = logging.StreamHandler()
    ch.setLevel(logging.WARNING)
    # create formatter and add it to the handlers
    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    fh.setFormatter(formatter)
    ch.setFormatter(formatter)
    # add the handlers to logger
    logger.addHandler(fh)
    logger.addHandler(ch)

    run_start_time = time.time()
    logger.info(f"********** Run {run + 1} starts. **********")

    logger.info(f'configuration is {args}')

    logger.info(f'node feature size {node_raw_features.shape}')
    logger.info(f'edge feature size {edge_raw_features.shape}')
    logger.info(f'node feature example {node_raw_features[1][:5]}')
    logger.info(f'edge feature example {edge_raw_features[1][:5]}')

    # create model
    dynamic_backbone = DyGFormer(node_raw_features=node_raw_features, edge_raw_features=edge_raw_features, neighbor_sampler=train_neighbor_sampler,
                                  time_feat_dim=args.time_feat_dim, channel_embedding_dim=args.channel_embedding_dim, patch_size=args.patch_size,
                                  num_layers=args.num_layers, num_heads=args.num_heads, dropout=args.dropout,
                                  max_input_sequence_length=args.max_input_sequence_length, device=args.device)

    # NMergeLayer(input_dims=[node_raw_features.shape[1]] * 4, hidden_dim=node_raw_features.shape[1] * 4, output_dim=2) #
    edge_classifier =  MLPClassifier_edge(input_dim=node_raw_features.shape[1], dropout=args.dropout, cat_num=cat_num)
    model = nn.Sequential(dynamic_backbone, edge_classifier)
    logger.info(f'model -> {model}')
    logger.info(f'model name: {args.model_name}, #parameters: {get_parameter_sizes(model) * 4} B, '
                f'{get_parameter_sizes(model) * 4 / 1024} KB, {get_parameter_sizes(model) * 4 / 1024 / 1024} MB.')

    optimizer = create_optimizer(model=model, optimizer_name=args.optimizer, learning_rate=args.learning_rate, weight_decay=args.weight_decay)

    model = convert_to_gpu(model, device=args.device)

    save_model_folder = f"./saved_models/{args.model_name}/{args.dataset_name}/{args.save_model_name}/"
    shutil.rmtree(save_model_folder, ignore_errors=True)
    os.makedirs(save_model_folder, exist_ok=True)

    early_stopping = EarlyStopping(patience=args.patience, save_model_folder=save_model_folder,
                                    save_model_name=args.save_model_name, logger=logger, model_name=args.model_name)

    loss_func = nn.CrossEntropyLoss()

    for epoch in range(args.num_epochs):
        model.train()
        model[0].set_neighbor_sampler(train_neighbor_sampler)

        # store train losses and metrics
        train_total_loss, train_y_trues, train_y_predicts = 0.0, [], []
        train_idx_data_loader_tqdm = tqdm(train_idx_data_loader, ncols=120)
        for batch_idx, train_data_indices in enumerate(train_idx_data_loader_tqdm):
            train_data_indices = train_data_indices.numpy()
            batch_src_node_ids, batch_dst_node_ids, batch_node_interact_times, batch_edge_ids, batch_labels = \
                train_data.src_node_ids[train_data_indices], train_data.dst_node_ids[train_data_indices], \
                train_data.node_interact_times[train_data_indices], train_data.edge_ids[train_data_indices], train_data.labels[train_data_indices]

            # we need to compute for positive and negative edges respectively, because the new sampling strategy (for evaluation) allows the negative source nodes to be
            # different from the source nodes, this is different from previous works that just replace destination nodes with negative destination nodes
            # get temporal embedding of source and destination nodes
            # two Tensors, with shape (batch_size, node_feat_dim)
            batch_src_node_embeddings, batch_dst_node_embeddings = \
                model[0].compute_src_dst_node_temporal_embeddings(src_node_ids=batch_src_node_ids,
                                                                  dst_node_ids=batch_dst_node_ids,
                                                                  node_interact_times=batch_node_interact_times)

            # get predicted probabilities, shape (batch_size, )
            predicts = model[1](x_1=batch_src_node_embeddings, x_2 = batch_dst_node_embeddings, rel_embs = model[0].edge_raw_features)
            pred_labels = torch.max(predicts, dim=1)[1]
            labels = torch.from_numpy(batch_labels).long().to(predicts.device)

            loss = loss_func(predicts, labels)

            train_y_trues.append(labels)
            train_y_predicts.append(pred_labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_idx_data_loader_tqdm.set_description(f'Epoch: {epoch + 1}, train for the {batch_idx + 1}-th batch, train loss: {loss.item()}')

        train_y_trues = torch.cat(train_y_trues, dim=0)
        train_y_predicts = torch.cat(train_y_predicts, dim=0)

        if (epoch + 1) % args.test_interval_epochs == 0:
            train_metrics = get_edge_classification_metrics(predicts=train_y_predicts, labels=train_y_trues)

            val_total_loss, val_metrics = evaluate_model_edge_classification(model_name=args.model_name,
                                                                            model=model,
                                                                            neighbor_sampler=full_neighbor_sampler,
                                                                            evaluate_idx_data_loader=val_idx_data_loader,
                                                                            evaluate_data=val_data,
                                                                            loss_func=loss_func,
                                                                            num_neighbors=args.num_neighbors,
                                                                            time_gap=args.time_gap)

            logger.info(f'Epoch: {epoch + 1}, learning rate: {optimizer.param_groups[0]["lr"]}')
            for metric_name in train_metrics.keys():
                logger.info(f'train {metric_name}, {train_metrics[metric_name]:.4f}')
            logger.info(f'validate loss: {val_total_loss:.4f}')
            for metric_name in val_metrics.keys():
                logger.info(f'validate {metric_name}, {val_metrics[metric_name]:.4f}')

            # select the best model based on all the validate metrics
            val_metric_indicator = []
            for metric_name in val_metrics.keys():
                val_metric_indicator.append((metric_name, val_metrics[metric_name], True))
            early_stop = early_stopping.step(val_metric_indicator, model)

            if early_stop:
                break

        # perform testing once after test_interval_epochs
        if (epoch + 1) % args.test_interval_epochs == 0:
            if args.model_name in ['JODIE', 'DyRep', 'TGN']:
                # backup memory bank after validating so it can be used for testing nodes (since test edges are strictly later in time than validation edges)
                val_backup_memory_bank = model[0].memory_bank.backup_memory_bank()

            test_total_loss, test_metrics = evaluate_model_edge_classification(model_name=args.model_name,
                                                                                model=model,
                                                                                neighbor_sampler=full_neighbor_sampler,
                                                                                evaluate_idx_data_loader=test_idx_data_loader,
                                                                                evaluate_data=test_data,
                                                                                loss_func=loss_func,
                                                                                num_neighbors=args.num_neighbors,
                                                                                time_gap=args.time_gap)

            if args.model_name in ['JODIE', 'DyRep', 'TGN']:
                # reload validation memory bank for saving models
                # note that since model treats memory as parameters, we need to reload the memory to val_backup_memory_bank for saving models
                model[0].memory_bank.reload_memory_bank(val_backup_memory_bank)

            logger.info(f'test loss: {test_total_loss:.4f}')
            for metric_name in test_metrics.keys():
                logger.info(f'test {metric_name}, {test_metrics[metric_name]:.4f}')

    # load the best model
    early_stopping.load_checkpoint(model)

    # evaluate the best model
    logger.info(f'get final performance on dataset {args.dataset_name}...')

    # the saved best model of memory-based models cannot perform validation since the stored memory has been updated by validation data
    val_total_loss, val_metrics = evaluate_model_edge_classification(model_name=args.model_name,
                                                                      model=model,
                                                                      neighbor_sampler=full_neighbor_sampler,
                                                                      evaluate_idx_data_loader=val_idx_data_loader,
                                                                      evaluate_data=val_data,
                                                                      loss_func=loss_func,
                                                                      num_neighbors=args.num_neighbors,
                                                                      time_gap=args.time_gap)

    test_total_loss, test_metrics = evaluate_model_edge_classification(model_name=args.model_name,
                                                                        model=model,
                                                                        neighbor_sampler=full_neighbor_sampler,
                                                                        evaluate_idx_data_loader=test_idx_data_loader,
                                                                        evaluate_data=test_data,
                                                                        loss_func=loss_func,
                                                                        num_neighbors=args.num_neighbors,
                                                                        time_gap=args.time_gap)

    # store the evaluation metrics at the current run
    val_metric_dict, test_metric_dict = {}, {}

    logger.info(f'validate loss: {val_total_loss:.4f}')
    for metric_name in val_metrics.keys():
        val_metric = val_metrics[metric_name]
        logger.info(f'validate {metric_name}, {val_metric:.4f}')
        val_metric_dict[metric_name] = val_metric

    logger.info(f'test loss: {test_total_loss:.4f}')
    for metric_name in test_metrics.keys():
        test_metric = test_metrics[metric_name]
        logger.info(f'test {metric_name}, {test_metric:.4f}')
        test_metric_dict[metric_name] = test_metric

    single_run_time = time.time() - run_start_time
    logger.info(f'Run {run + 1} cost {single_run_time:.2f} seconds.')

    val_metric_all_runs.append(val_metric_dict)
    test_metric_all_runs.append(test_metric_dict)

    # avoid the overlap of logs
    if run < args.num_runs - 1:
        logger.removeHandler(fh)
        logger.removeHandler(ch)

    # save model result
    result_json = {
        "validate metrics": {metric_name: f'{val_metric_dict[metric_name]:.4f}' for metric_name in val_metric_dict},
        "test metrics": {metric_name: f'{test_metric_dict[metric_name]:.4f}' for metric_name in test_metric_dict}
    }
    result_json = json.dumps(result_json, indent=4)

    save_result_folder = f"./saved_results/{args.model_name}/{args.dataset_name}"
    os.makedirs(save_result_folder, exist_ok=True)
    save_result_path = os.path.join(save_result_folder, f"{args.save_model_name}.json")

    with open(save_result_path, 'w') as file:
        file.write(result_json)

# store the average metrics at the log of the last run
logger.info(f'metrics over {args.num_runs} runs:')

for metric_name in val_metric_all_runs[0].keys():
    logger.info(f'validate {metric_name}, {[val_metric_single_run[metric_name] for val_metric_single_run in val_metric_all_runs]}')
    logger.info(f'average validate {metric_name}, {np.mean([val_metric_single_run[metric_name] for val_metric_single_run in val_metric_all_runs]):.4f} '
                f'± {np.std([val_metric_single_run[metric_name] for val_metric_single_run in val_metric_all_runs], ddof=1):.4f}')

for metric_name in test_metric_all_runs[0].keys():
    logger.info(f'test {metric_name}, {[test_metric_single_run[metric_name] for test_metric_single_run in test_metric_all_runs]}')
    logger.info(f'average test {metric_name}, {np.mean([test_metric_single_run[metric_name] for test_metric_single_run in test_metric_all_runs]):.4f} '
                f'± {np.std([test_metric_single_run[metric_name] for test_metric_single_run in test_metric_all_runs], ddof=1):.4f}')

torch.save(model.state_dict(), save_result_folder + args.save_model_name)


INFO:root:********** Run 1 starts. **********
INFO:root:configuration is Namespace(dataset_name='Googlemap_CT', batch_size=256, model_name='DyGFormer', gpu=0, num_neighbors=20, sample_neighbor_strategy='recent', time_scaling_factor=1e-06, num_walk_heads=8, num_heads=2, num_layers=2, walk_length=1, time_gap=2000, time_feat_dim=100, position_feat_dim=172, patch_size=2, channel_embedding_dim=50, max_input_sequence_length=64, learning_rate=0.0001, dropout=0.1, num_epochs=8, optimizer='Adam', weight_decay=0.0, patience=5, val_ratio=0.15, test_ratio=0.15, num_runs=1, test_interval_epochs=1, use_feature='Bert', load_best_configs=False, device='cuda:0', seed=0, save_model_name='edge_classification_DyGFormer_seed0Bert')
INFO:root:node feature size (111169, 768)
INFO:root:edge feature size (1196500, 768)
INFO:root:node feature example [-0.86001182 -0.35063297 -0.55581796  0.75087303  0.25488845]
INFO:root:edge feature example [ 1.84959125 -0.21416666 -0.49901664  0.02135122 -0.91911344]
INFO:ro

In [None]:
"""
Add validation metric that's like the relative ranking of reference edges

Model scores all predicted within a small window, but maybe? This is ok because maybe all the model needs is to show this bias for some samples over others
For 5000 random validation edges, find up to 50 possible reference edges per side. Compare orderings from model against ground truth
"""

# os.path.join(save_result_folder, f"{args.save_model_name}.json")

from scipy.stats import kendalltau

# Load in same test set used for recommendation
with open('../DyLink_Datasets/' + args.dataset_name + '/processed_data.pkl', 'rb') as file:
    processed_data = pickle.load(file)
_, _, _, _, rec_val_data, _, _, _, train_data_edges, _ = processed_data

data = rec_val_data.copy()
n_val_edges = 2000
max_n_per_ranking = 50
model_postfix = '' #''

# Fetch model
dynamic_backbone = DyGFormer(node_raw_features=node_raw_features, edge_raw_features=edge_raw_features, neighbor_sampler=full_neighbor_sampler,
                                  time_feat_dim=args.time_feat_dim, channel_embedding_dim=args.channel_embedding_dim, patch_size=args.patch_size,
                                  num_layers=args.num_layers, num_heads=args.num_heads, dropout=args.dropout,
                                  max_input_sequence_length=args.max_input_sequence_length, device=args.device)
edge_classifier = MLPClassifier_edge(input_dim=node_raw_features.shape[1], dropout=args.dropout, cat_num=cat_num)
model = nn.Sequential(dynamic_backbone, edge_classifier)
args.seed = 0
args.save_model_name = f'edge_classification_{args.model_name}_seed0{args.use_feature}'
save_model_location = f"../Code/saved_models/{args.model_name}/{args.dataset_name}/{args.save_model_name}{model_postfix}/{args.save_model_name}{model_postfix}.pkl"
model.load_state_dict(torch.load(save_model_location))
model = convert_to_gpu(model, device=args.device)
loss_func = nn.CrossEntropyLoss()

# setup data: use same data as train link recommendation
edge_texts = pd.read_csv('../DyLink_Datasets/' + args.dataset_name + '/relation_text.csv')

mask = np.zeros(len(data.ref_src_node_ids), dtype=bool)
random_indices = np.random.choice(len(data.ref_src_node_ids), size=n_val_edges, replace=False)
mask[random_indices] = True

data.apply_mask(mask)
data = data.calc_ref_edges(train_data_edges, max_n_per_ranking, edge_texts)
data.correct_typing()

ranking_data = data
ranking_data_loader = get_idx_data_loader(indices_list=list(range(len(data.src_node_ids))), batch_size=args.batch_size, shuffle=False)

# Run forward passes of model
predicts = []
model.eval()
with torch.no_grad():
    # store evaluate losses, trues and predicts
    evaluate_idx_data_loader_tqdm = tqdm(ranking_data_loader, ncols=120)
    for batch_idx, evaluate_data_indices in enumerate(evaluate_idx_data_loader_tqdm):
        evaluate_data_indices = evaluate_data_indices.numpy()
        batch_src_node_ids, batch_dst_node_ids, batch_node_interact_times, batch_edge_ids, batch_labels = \
            ranking_data.src_node_ids[evaluate_data_indices],  ranking_data.dst_node_ids[evaluate_data_indices], \
            ranking_data.node_interact_times[evaluate_data_indices], ranking_data.edge_ids[evaluate_data_indices], ranking_data.labels[evaluate_data_indices]

        batch_src_node_embeddings, batch_dst_node_embeddings = \
            model[0].compute_src_dst_node_temporal_embeddings(src_node_ids=batch_src_node_ids,
                                                              dst_node_ids=batch_dst_node_ids,
                                                                  node_interact_times=batch_node_interact_times)

        batch_ref_src_node_ids, batch_ref_dst_node_ids, batch_ref_node_interact_times, batch_ref_edge_ids = \
            ranking_data.ref_src_node_ids[evaluate_data_indices], ranking_data.ref_dst_node_ids[evaluate_data_indices], \
            ranking_data.ref_node_interact_times[evaluate_data_indices], ranking_data.ref_edge_ids[evaluate_data_indices]

        batch_ref_src_node_embeddings, batch_ref_dst_node_embeddings = \
            model[0].compute_src_dst_node_temporal_embeddings(src_node_ids=batch_ref_src_node_ids,
                                                                dst_node_ids=batch_ref_dst_node_ids,
                                                                node_interact_times=batch_ref_node_interact_times)

        target_embedding = np.concatenate((batch_src_node_embeddings.cpu(), batch_dst_node_embeddings.cpu()), axis=1)
        ref_embedding = np.concatenate((batch_ref_src_node_embeddings.cpu(), batch_ref_dst_node_embeddings.cpu()), axis=1)
        dot_product = np.sum(target_embedding * ref_embedding, axis=1)
        norm1 = np.linalg.norm(target_embedding, axis=1)
        norm2 = np.linalg.norm(ref_embedding, axis=1)
        similarity = dot_product / (norm1 * norm2)
        predicts.append(similarity)

predicts = np.concatenate(predicts)

# Compute scores
target_edge_ids = dict.fromkeys(ranking_data.edge_ids).keys()
src_mask = (ranking_data.ref_on_src_side == True)
dst_mask = (ranking_data.ref_on_src_side == False)

model_labels = np.array(predicts, dtype=np.float32)
actual_labels = np.array(ranking_data.labels, dtype=np.float32)


src_score = 0.0
dst_score = 0.0
print("\n")
for target_edge_id in target_edge_ids:
    edge_mask = (ranking_data.edge_ids == target_edge_id)
    for side_mask, side_mask_name in [[src_mask, 'src'], [dst_mask, 'dst']]:
        combined_mask = np.logical_and(edge_mask, side_mask)
        model_rank = model_labels[combined_mask].size - np.argsort(model_labels[combined_mask])
        actual_rank = actual_labels[combined_mask].size - np.argsort(actual_labels[combined_mask])
        if(model_rank.shape[0] < 2):
          continue
        if side_mask_name == 'src':
            src_score += kendalltau(model_rank, actual_rank).statistic
        else:
            dst_score += kendalltau(model_rank, actual_rank).statistic

src_score /= len(target_edge_ids)
dst_score /= len(target_edge_ids)

print("Source score: " + str(src_score))
print("Dest score: " + str(dst_score))



  model.load_state_dict(torch.load(save_model_location))


0


100%|█████████████████████████████████████████████████████████████████████████████████| 428/428 [01:39<00:00,  4.30it/s]




Source score: 0.3616283992661349
Dest score: 0.16041912158915356
