# Run best model

Take model config from best model in `dglke_results` and train a model with the same parameters on the full dataset.


In [3]:
import numpy as np
import itertools
import datetime
import json
import os

## 1. Get parameters

In [8]:
best_model_config = "RotatE_heritageconnector_10"

with open(f"./dglke_results/{best_model_config}/config.json") as f:
    p = json.load(f)

In [12]:
p

{'dataset': 'heritageconnector',
 'model': 'RotatE',
 'emb_size': 400,
 'max_train_step': 262564,
 'batch_size': 8000,
 'neg_sample_size': 10,
 'lr': 0.01,
 'gamma': 5.0,
 'double_ent': True,
 'double_rel': False,
 'neg_adversarial_sampling': True,
 'adversarial_temperature': 1.0,
 'regularization_coef': 2e-06,
 'regularization_norm': 3,
 'emap_file': 'entities.tsv',
 'rmap_file': 'relations.tsv'}

## 2. Run DGL-KE on each of the parameter sets

In [18]:
# fixed params
DATA_PATH = "../data/interim/"
TRAIN_FILENAME = "triples_filtered_by_predicate.csv"
SAVE_AND_LOGS_PATH="./dglke_best_model"
DATASET="heritageconnector"
FORMAT="raw_udd_hrt"

LOG_INTERVAL=10000
BATCH_SIZE_EVAL=16
NEG_SAMPLE_SIZE_EVAL=1000
N_EPOCHS=1000
N_TRIPLES=2793238 # 19.07


In [19]:
# delete old results and logs folders
! rm -rf {SAVE_AND_LOGS_PATH}

In [20]:
# run experiment

!mkdir dglke_best_model

"""
Explanation for (some) parameters:
- max_step: we convert from n_epochs to n_steps by doing n_epochs*(n_triples/batch_size)
- de: double entity dimension, as RotatE entities have a complex representation
"""

print(f"---TRAINING {best_model_config}---")

filename = f"{SAVE_AND_LOGS_PATH}/logs.txt"
neg_adv_flag = '-adv' if p['neg_adversarial_sampling'] else ''

!DGLBACKEND=pytorch dglke_train --model_name {p['model']} -de --data_path {DATA_PATH} --save_path {SAVE_AND_LOGS_PATH} --dataset {DATASET} --format {FORMAT} \
--data_files {TRAIN_FILENAME} --delimiter '	' --max_step {int(N_TRIPLES/p['batch_size']*N_EPOCHS)} \
--log_interval {LOG_INTERVAL} --batch_size {p['batch_size']} --neg_sample_size {p['neg_sample_size']} \
--lr {p['lr']} {neg_adv_flag} --hidden_dim {p['emb_size']} -rc {p['regularization_coef']} -g {p['gamma']} \
--gpu 0 --mix_cpu_gpu --async_update |& tee {filename}


---TRAINING RotatE_heritageconnector_10---
!DGLBACKEND=pytorch dglke_train --model_name RotatE --data_path ../data/interim/ --save_path ./dglke_best_model --dataset heritageconnector --format raw_udd_hrt --data_files triples_filtered_by_predicate.csv --delimiter '	' --max_step 349154 --log_interval 10000 --batch_size 8000 --neg_sample_size 10 --lr 0.01 -adv --hidden_dim 400 -rc 2e-06 -g 5.0 --gpu 0 --mix_cpu_gpu --async_update |& tee ./dglke_best_model/logs.txt
