In [1]:
import shutil
import os

# Đường dẫn gốc trong Input (Thay 'ten-dataset-cua-ban' bằng tên thực tế)
# Bạn có thể xem đường dẫn chính xác ở thanh sidebar bên phải phần "Data"
source_dir = '/kaggle/input/pairre-nt2/baseline_icews14'

# Đường dẫn đích (Nơi bạn sẽ chạy code)
destination_dir = '/kaggle/working/my_code'

# Copy toàn bộ thư mục
if not os.path.exists(destination_dir):
    shutil.copytree(source_dir, destination_dir)
    print("Đã copy code sang /kaggle/working thành công!")
else:
    print("Thư mục code đã tồn tại ở working directory.")

# Di chuyển thư mục làm việc hiện tại vào đó để chạy lệnh terminal cho dễ
os.chdir(destination_dir)
print(f"Thư mục làm việc hiện tại: {os.getcwd()}")

Đã copy code sang /kaggle/working thành công!
Thư mục làm việc hiện tại: /kaggle/working/my_code


In [5]:
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

PyTorch version: 2.6.0+cu124
CUDA available: True


In [2]:
%%writefile requirements.txt
tqdm
numpy
scikit-learn
scipy

Writing requirements.txt


In [4]:
%%capture
!pip install -r requirements.txt

In [5]:
!bash /kaggle/working/my_code/download_dataset.sh

Downloading ICEWS14 Dataset
Downloading from https://dl.fbaipublicfiles.com/tkbc/data.tar.gz ...
Extracting data.tar.gz ...
Checking archive contents...
Available datasets:
total 24
drwxrwxr-x 6 1185300502 1185300502 4096 Jan  2  2020 .
drwxr-xr-x 3 root       root       4096 Jan  3 14:47 ..
drwxrwxr-x 2 1185300502 1185300502 4096 Jan  2  2020 ICEWS05-15
drwxrwxr-x 2 1185300502 1185300502 4096 Jan  2  2020 ICEWS14
drwxrwxr-x 2 1185300502 1185300502 4096 Apr  8  2020 wikidata
drwxrwxr-x 2 1185300502 1185300502 4096 Jan  2  2020 yago15k
Found: src_data/ICEWS14

Contents of src_data/ICEWS14/:
total 6.0M
-rw-rw-r-- 1 1185300502 1185300502   99 Jan  2  2020 LICENSE
-rw-rw-r-- 1 1185300502 1185300502 604K Jan  2  2020 test
-rw-rw-r-- 1 1185300502 1185300502 4.8M Jan  2  2020 train
-rw-rw-r-- 1 1185300502 1185300502 601K Jan  2  2020 valid

Found files without extension - copying to raw/ with .txt extension...
✓ Copied train, valid, test → raw/*.txt

Preprocessing data to create .pkl files...

In [9]:
%%writefile /kaggle/working/my_code/train_baseline.sh
#!/bin/bash

# Baseline PairRE Training Script for ICEWS14
# NO temporal modeling - timestamps ignored!

echo "=================================================="
echo "BASELINE PairRE - Static KG Approach"
echo "Dataset: ICEWS14 (ignoring timestamps)"
echo "=================================================="

# Step 1: Download and prepare data (if not exists)
if [ ! -d "processed" ]; then
    echo ""
    echo "Downloading ICEWS14 dataset from Facebook AI Research..."
    bash download_dataset.sh
    
    if [ $? -ne 0 ]; then
        echo "❌ Data download failed!"
        exit 1
    fi
else
    echo "✓ Data already prepared in ./processed"
fi

# Configuration
DATA_PATH="processed"
MODEL="BaselinePairRE"
SAVE_DIR="checkpoints/ICEWS14_BaselinePairRE"

# Hyperparameters (matching temporal model for fair comparison)
DIMENSION=500
GAMMA=12.0
LR=0.0001
BATCH_SIZE=256
NEG_SIZE=128
ADV_TEMP=1.0
REG=0.000001
MAX_STEPS=100000
WARMUP=50000
VALID_STEPS=5000
SAVE_STEPS=5000

echo ""
echo "Configuration:"
echo "  Model: $MODEL"
echo "  Dimension: $DIMENSION"
echo "  Gamma: $GAMMA"
echo "  Learning Rate: $LR"
echo "  Batch Size: $BATCH_SIZE"
echo "  Negative Samples: $NEG_SIZE"
echo "  Max Steps: $MAX_STEPS"
echo ""
echo "NOTE: This baseline IGNORES timestamps during training!"
echo "      Evaluation uses temporal filtering for fair comparison."
echo ""

# Run training
python -u run.py \
  --do_train \
  --cuda \
  --do_valid \
  --do_test \
  --evaluate_train \
  --model $MODEL \
  --data_path $DATA_PATH \
  -n $NEG_SIZE \
  -b $BATCH_SIZE \
  -d $DIMENSION \
  -g $GAMMA \
  -a $ADV_TEMP \
  -adv \
  -dr \
  -r $REG \
  -lr $LR \
  --max_steps $MAX_STEPS \
  --warm_up_steps $WARMUP \
  --cpu_num 2 \
  --test_batch_size 32 \
  --valid_steps $VALID_STEPS \
  --log_steps 100 \
  --save_checkpoint_steps $SAVE_STEPS \
  --save_path $SAVE_DIR

echo ""
echo "=================================================="
echo "Training Complete!"
echo "Check results in: $SAVE_DIR"
echo "=================================================="


Overwriting /kaggle/working/my_code/train_baseline.sh


In [None]:
!bash /kaggle/working/my_code/train_baseline.sh

BASELINE PairRE - Static KG Approach
Dataset: ICEWS14 (ignoring timestamps)
✓ Data already prepared in ./processed

Configuration:
  Model: BaselinePairRE
  Dimension: 500
  Gamma: 12.0
  Learning Rate: 0.0001
  Batch Size: 256
  Negative Samples: 128
  Max Steps: 100000

NOTE: This baseline IGNORES timestamps during training!
      Evaluation uses temporal filtering for fair comparison.

2026-01-03 14:49:56,804 INFO     BASELINE PairRE (No Temporal Modeling)
2026-01-03 14:49:56,804 INFO     Model: BaselinePairRE
2026-01-03 14:49:56,804 INFO     Data Path: processed
2026-01-03 14:49:56,804 INFO     #entity: 7128
2026-01-03 14:49:56,804 INFO     #relation: 230
2026-01-03 14:49:56,804 INFO     NOTE: Timestamps are IGNORED during training!
2026-01-03 14:49:56,804 INFO           But temporal filtering is used in evaluation.
2026-01-03 14:49:56,836 INFO     Model Parameter Configuration:
2026-01-03 14:49:56,836 INFO     Parameter gamma: torch.Size([1]), require_grad = False
2026-01-03 14:49

In [13]:
import shutil
import os

# Tên file zip kết quả
output_filename = "/kaggle/working/ket_qua_training"

dir_to_zip = "/kaggle/working/my_code"

try:
    shutil.make_archive(output_filename, 'zip', dir_to_zip)
    print(f"Đã nén xong! File nằm tại: {output_filename}.zip")
except Exception as e:
    print(f"Lỗi khi nén: {e}")

Đã nén xong! File nằm tại: /kaggle/working/ket_qua_training.zip
