In [1]:
file_name = "rank_sample_seq"
margin = "0"
lr = "5e-4"
epoch = 50

In [2]:
import pandas as pd
import numpy as np
import pickle
import torch
import torch.nn as nn
import argparse
from tqdm import tqdm
import sys
from sklearn.metrics import accuracy_score

In [3]:
sys.path.append("C:\\Users\\hayas\\proj-rank-general\\git\\code\\ranknet_seq\\")
import utils
import main_model
import coach
import optim
import preprocessing
import evaluation

with open('c:\\Users\\hayas\\proj-rank-general\\git\\data\\features_dict_seq.pkl', 'rb') as p:
    features_dict = pickle.load(p)
with open('c:\\Users\\hayas\\proj-rank-general\\git\\data\\lopocv_dict.pkl', 'rb') as p:
    lopocv_dict = pickle.load(p)

In [4]:
features_dict["00_0"].keys()

dict_keys(['features', 'score'])

In [4]:
def main(args, train_set_in_test, test_set):
    collator = preprocessing.collator
    dataset = preprocessing.ConversationRelDataModule(train_dataset=train_set_in_test, test_dataset = test_set, batch_size=args.batch_size, \
                                             collator=collator, features_dict=features_dict, margin=args.margin)
    dataset.setup(stage="fit")
    train_dataloader = dataset.train_dataloader()
    dataset.setup(stage="test")
    test_dataloader = dataset.test_dataloader()

    model = main_model.MainModel(args).to(args.device)
    opt = optim.Optim(args.learning_rate, args.max_grad_value, args.weight_decay)
    opt.set_parameters(model.parameters(), args.optimizer)

    coach_model = coach.Coach(train_dataloader, test_dataloader, model, opt, args)
    ret = coach_model.train()
    return ret

In [5]:
log = utils.get_logger()
return_dict = {}

for test_id in lopocv_dict.keys():
    log.info("---------test_id: {}---------".format(test_id))
    
    parser = argparse.ArgumentParser()
    parser.add_argument("--from_begin", type=str, default="True", help="Training from begin.")
    parser.add_argument("--device", type=str, default="cuda:0", help="Computing device.")
    parser.add_argument("--batch_size", default=32, type=int, help="Batch size.")
    parser.add_argument("--drop_rate", type=float, default=0.1, help="Dropout rate.")
    parser.add_argument("--optimizer", type=str, default="adam", choices=["sgd", "rmsprop", "adam"], help="Name of optimizer.")
    parser.add_argument("--learning_rate", type=float, default=1e-5, help="Learning rate.")
    parser.add_argument("--max_grad_value", default=-1, type=float, \
                      help="""If the norm of the gradient vector exceeds this, normalize it to have the norm equal to max_grad_norm""")
    parser.add_argument("--weight_decay", type=float, default=1e-8, help="Weight decay.")
    parser.add_argument("--epochs", default=30, type=int, help="Number of training epochs.")
    parser.add_argument("--rnn", type=str, default="lstm", choices=["lstm", "bi_lstm"], help="rnn model.")
    parser.add_argument("--margin", default=1, type=int, help="Margin.")
    parser.add_argument("--intermediate_lstm_uni", default=256, type=int)
    parser.add_argument("--intermediate_mlp_uni", default=256, type=int)
    parser.add_argument("--output_mlp_uni", default=256, type=int)
    parser.add_argument("--intermediate_lstm_mm", default=256, type=int)
    parser.add_argument("--intermediate_mlp_mm", default=256, type=int)

    args = parser.parse_args(args=["--learning_rate", str(lr), "--drop_rate", str(0.25), "--margin", margin, "--epochs", str(epoch)])
    ret = main(args, lopocv_dict[test_id]["train_in_test"], lopocv_dict[test_id]["test"])
    return_dict[test_id] = ret

with open("C:\\Users\\hayas\\proj-rank-general\\git\\output\\ret\\20240603\\{}.pickle".format(file_name), mode="wb") as f:
      pickle.dump(return_dict, f)

06/08/2024 02:07:55 ---------test_id: 00---------
06/08/2024 02:07:55 finished loading 290 examples
06/08/2024 02:07:55 finished loading 10 examples
06/08/2024 02:07:55 input_uni: 8, intermediate_uni: 32
06/08/2024 02:07:55 lstm:lstm, lr:0.0005, drop: 0.25, margin: 0: epoch: 50
06/08/2024 02:07:55 [Epoch 1] [Loss: 6.935895] [Acc: 0.462069] [Time: 0.213651]
06/08/2024 02:07:55 [Test set] [Loss 0.6935] [Acc: 0.2000]
06/08/2024 02:07:55 best loss model.
06/08/2024 02:07:55 [Epoch 2] [Loss: 6.924712] [Acc: 0.579310] [Time: 0.116917]
06/08/2024 02:07:55 [Test set] [Loss 0.6947] [Acc: 0.1000]
06/08/2024 02:07:55 [Epoch 3] [Loss: 6.921619] [Acc: 0.575862] [Time: 0.133848]
06/08/2024 02:07:55 [Test set] [Loss 0.6952] [Acc: 0.1000]
06/08/2024 02:07:56 [Epoch 4] [Loss: 6.918529] [Acc: 0.558621] [Time: 0.116783]
06/08/2024 02:07:56 [Test set] [Loss 0.6954] [Acc: 0.1000]
06/08/2024 02:07:56 [Epoch 5] [Loss: 6.937402] [Acc: 0.572414] [Time: 0.133453]
06/08/2024 02:07:56 [Test set] [Loss 0.6962] [Ac

In [6]:
acc = evaluation.cal_acc(epoch-10, epoch, return_dict)

Acc: 40: 0.5133333333333333
Acc: 45: 0.5033333333333333
Acc: 50: 0.5133333333333333


In [7]:
rank_list = evaluation.cal_rank(epoch-10, epoch, return_dict, features_dict)

------40epoch------
tau: 0.029819691488701804, len: 30
P@1: 0.26666666666666666, len: 30
P@-1: 0.2, len 30
------45epoch------
tau: 0.009459074466105399, len: 30
P@1: 0.26666666666666666, len: 30
P@-1: 0.23333333333333334, len 30
------50epoch------
tau: 0.029459074466105406, len: 30
P@1: 0.3, len: 30
P@-1: 0.26666666666666666, len 30
