In [1]:
# coding=utf-8
import torch
from torch_geometric import transforms as T
torch.manual_seed(3407)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

  from .autonotebook import tqdm as notebook_tqdm


### Step 1 定义模型和图数据

In [2]:
from src.model import HetGCN, HAN
from src.tools import hetero_data, Study, split_hetero_graph
import os

os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"

transform = T.Compose([T.NormalizeFeatures(), T.ToUndirected(), T.ToDevice(device)])
data, id_mapping = hetero_data(
    "./data/", transform=transform, fill_mode="stats", return_id_mapping=True
)
test_graph, train_graph = split_hetero_graph(
    data, "patient", [id_mapping["patient"][id] for id in range(1, 791)]
)
split = T.RandomLinkSplit(
    num_test=0.1,
    num_val=0.1,
    edge_types=[(h, r, t) for h, r, t in data.edge_types if not r.startswith("rev_")],
    rev_edge_types=[(h, r, t) for h, r, t in data.edge_types if r.startswith("rev_")],
)

In [3]:
# model = HAN(data.metadata(), 128).to(device)
model = HetGCN(data.metadata(), 128).to(device)
study = Study(model, train_graph, split)

### Step 2 训练

In [4]:
CKPT_DIR = f"./ckpt/{model.name}/"
NUM_EPOCH = 100
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.BCEWithLogitsLoss()
study.train(NUM_EPOCH, optimizer, criterion, save_dir=CKPT_DIR)

Epoch: 1/100, Loss: 0.7229, Val: 0.9569, Test: 0.9603
Epoch: 2/100, Loss: 0.7079, Val: 0.9702, Test: 0.9702
Epoch: 3/100, Loss: 0.6895, Val: 0.9680, Test: 0.9696
Epoch: 4/100, Loss: 0.6820, Val: 0.9655, Test: 0.9633
Epoch: 5/100, Loss: 0.6285, Val: 0.9679, Test: 0.9591
Epoch: 6/100, Loss: 0.6214, Val: 0.9660, Test: 0.9578
Epoch: 7/100, Loss: 0.5691, Val: 0.9647, Test: 0.9570
Epoch: 8/100, Loss: 0.5349, Val: 0.9646, Test: 0.9562
Epoch: 9/100, Loss: 0.5291, Val: 0.9645, Test: 0.9558
Epoch: 10/100, Loss: 0.5288, Val: 0.9637, Test: 0.9550
Epoch: 11/100, Loss: 0.5288, Val: 0.9731, Test: 0.9655
Epoch: 12/100, Loss: 0.5288, Val: 0.9731, Test: 0.9655
Epoch: 13/100, Loss: 0.5288, Val: 0.9731, Test: 0.9655
Epoch: 14/100, Loss: 0.5288, Val: 0.9731, Test: 0.9655
Epoch: 15/100, Loss: 0.5288, Val: 0.9731, Test: 0.9655
Epoch: 16/100, Loss: 0.5288, Val: 0.9731, Test: 0.9655
Epoch: 17/100, Loss: 0.5288, Val: 0.9731, Test: 0.9655
Epoch: 18/100, Loss: 0.5288, Val: 0.9731, Test: 0.9655
Epoch: 19/100, Loss

### Step 3 测试

In [5]:
# test on the model of the last epoch
precision, recall, f1_score = study.test(test_graph, 7)
print(f"precision: {precision:.4f}\nrecall: {recall:.4f}\nf1_score: {f1_score}")

precision: 0.0013
recall: 0.0016
f1_score: 0.0013877579476684332


### Step 4 寻找最佳模型和最佳的topK
这里的`topK`是指病人和药品之间概率最`topK`大的边

In [6]:
study.find_best(CKPT_DIR, test_graph, NUM_EPOCH * 2, range(1, NUM_EPOCH + 1), range(1, 20))

[I 2024-04-25 11:57:46,904] A new study created in memory with name: no-name-5a7111c9-62b9-4563-a92e-7c78622d0847
[I 2024-04-25 11:57:46,915] Trial 0 finished with value: 0.0019323504529893398 and parameters: {'model_id': 51, 'topk': 14}. Best is trial 0 with value: 0.0019323504529893398.
[I 2024-04-25 11:57:46,925] Trial 1 finished with value: 0.0019755035173147917 and parameters: {'model_id': 98, 'topk': 14}. Best is trial 1 with value: 0.0019755035173147917.
[I 2024-04-25 11:57:46,943] Trial 2 finished with value: 0.0008438818622380495 and parameters: {'model_id': 16, 'topk': 2}. Best is trial 1 with value: 0.0019755035173147917.
[I 2024-04-25 11:57:46,953] Trial 3 finished with value: 0.0017322313506156206 and parameters: {'model_id': 86, 'topk': 10}. Best is trial 1 with value: 0.0019755035173147917.
[I 2024-04-25 11:57:46,966] Trial 4 finished with value: 0.002543793059885502 and parameters: {'model_id': 8, 'topk': 18}. Best is trial 4 with value: 0.002543793059885502.
[I 2024-04

Best F1 score: 0.0028953582514077425
Optimal parameters: {'model_id': 3, 'topk': 19}
