In [1]:
import torch
import numpy as np
import pandas as pd
from sklearn import metrics
from itertools import cycle
from torch.optim import Adam
import matplotlib.pyplot as plt
from torch_geometric.data import Data
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from torch_geometric.transforms import RandomLinkSplit, RandomNodeSplit

from src.dmgi import DMGI
from src.data_preparation import DataPreprocessor
from src.graph_data_loader import graph_loader,  heterogeneous_graph_loader

In [2]:
het_train_graph, het_test_graph = heterogeneous_graph_loader(split_type='whole', swap_rate=0.1)

In [None]:
het_transform = RandomNodeSplit(num_train_per_class=400)
het_data = het_transform(het_train_graph)

In [5]:
model = DMGI(het_data['sample'].num_nodes, het_data['sample'].x.size(-1),
             out_channels=128, num_relations=len(het_data.edge_types))
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
het_data, model = het_data.to(device), model.to(device)

optimizer = Adam(model.parameters(), lr=0.005, weight_decay=0.001)


In [6]:
def train():
    model.train()
    optimizer.zero_grad()
    x = het_data['sample'].x
    edge_indices = het_data.edge_index_dict.values()
    pos_hs, neg_hs, summaries = model(x, edge_indices)
    loss = model.loss(pos_hs, neg_hs, summaries)
    loss.backward()
    optimizer.step()
    return float(loss)


@torch.no_grad()
def test():
    train_emb = model.Z[het_data['sample'].train_mask].cpu()
    val_emb = model.Z[het_data['sample'].val_mask].cpu()
    test_emb = model.Z[het_data['sample'].test_mask].cpu()

    train_y = het_data['sample'].y[het_data['sample'].train_mask].cpu()
    val_y = het_data['sample'].y[het_data['sample'].val_mask].cpu()
    test_y = het_data['sample'].y[het_data['sample'].test_mask].cpu()

    #clf = LogisticRegression(class_weight='balanced').fit(train_emb, train_y)
    clf = make_pipeline(StandardScaler(), LogisticRegression(class_weight='balanced'))
    clf.fit(train_emb, train_y)
    print(clf.classes_)
    train_score = metrics.roc_auc_score(train_y, clf.predict(train_emb))
    val_score = metrics.roc_auc_score(val_y, clf.predict(val_emb))
    test_score = metrics.roc_auc_score(test_y, clf.predict(test_emb))
    return train_score, val_score, test_score

In [7]:
for epoch in range(1, 21):
    loss = train()
    if epoch % 1 == 0:
        train_acc, val_acc, test_acc = test()
        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, '
              f'Train: {train_acc:.4f}, Val: {val_acc:.4f}, Test: {test_acc:.4f}')

[0 1]
Epoch: 001, Loss: 17967978.0000, Train: 0.5840, Val: 0.4714, Test: 0.4871
[0 1]
Epoch: 002, Loss: 17895790.0000, Train: 0.5863, Val: 0.4770, Test: 0.4931
[0 1]
Epoch: 003, Loss: 12689805.0000, Train: 0.5869, Val: 0.4896, Test: 0.5018
[0 1]
Epoch: 004, Loss: 6611029.5000, Train: 0.5918, Val: 0.4730, Test: 0.5170
[0 1]
Epoch: 005, Loss: 12906369.0000, Train: 0.5883, Val: 0.4730, Test: 0.5181
[0 1]
Epoch: 006, Loss: 5122281.0000, Train: 0.5895, Val: 0.4856, Test: 0.5002
[0 1]
Epoch: 007, Loss: 10422079.0000, Train: 0.5926, Val: 0.4733, Test: 0.5063
[0 1]
Epoch: 008, Loss: 4420702.0000, Train: 0.5895, Val: 0.4643, Test: 0.5005
[0 1]
Epoch: 009, Loss: 1632308.8750, Train: 0.5906, Val: 0.4669, Test: 0.5041
[0 1]
Epoch: 010, Loss: -806072.9375, Train: 0.5897, Val: 0.4461, Test: 0.5151
[0 1]
Epoch: 011, Loss: 2053672.5000, Train: 0.5875, Val: 0.4461, Test: 0.5106
[0 1]
Epoch: 012, Loss: 1098337.8750, Train: 0.5876, Val: 0.4542, Test: 0.5177
[0 1]
Epoch: 013, Loss: -3900273.5000, Train: 0