<a href="https://colab.research.google.com/github/Marsobad/ADL/blob/master/Train_ppi.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install dgl-cu101



In [0]:
import argparse
from os import path
import matplotlib.pyplot as plt

import numpy as np
import time
import torch
import torch.nn.functional as F
import dgl
from dgl import batch
from dgl.data.ppi import LegacyPPIDataset
from dgl.nn.pytorch import GraphConv
from dgl.nn.pytorch import edge_softmax, GATConv
from sklearn.metrics import f1_score
from torch import nn, optim
from torch.utils.data import DataLoader

In [0]:
#MODEL_STATE_FILE = path.join(path.dirname(path.abspath(__file__)), "model_state.pth")
#in_channels, out_channels, heads=1, concat=True, negative_slope=0.2, dropout=0, bias=True
MODEL_STATE_FILE = "model_state.pth"
class GAT(nn.Module):
    def __init__(self, g, num_layers, in_dim, num_hidden, num_classes, heads, feat_drop, attn_drop, negative_slope, residual, activation):
        super(GAT, self).__init__()

        self.g = g
        self.num_layers = num_layers
        self.gat_layers = nn.ModuleList()
        self.activation = activation
        # input projection (no residual)
        self.gat_layers.append(GATConv(in_dim, num_hidden, heads, feat_drop, attn_drop, negative_slope, False, self.activation))
        # hidden layers
        for l in range(1, num_layers):
            # due to multi-head, the in_dim = num_hidden * num_heads
            self.gat_layers.append(GATConv(num_hidden * heads, num_hidden, heads, feat_drop, attn_drop, negative_slope, residual, self.activation))
        # output projection
        self.gat_layers.append(GATConv(num_hidden * heads, num_classes, heads, feat_drop, attn_drop, negative_slope, residual, None))

    def forward(self, inputs):
        h = inputs
        for l in range(len(self.gat_layers)- 1):
            h = self.gat_layers[l](self.g, h).flatten(1)
        # output projection
        logits = self.gat_layers[-1](self.g, h).mean(1)
        return logits

class BasicGraphModel(nn.Module):

    def __init__(self, g, n_layers, input_size, hidden_size, output_size, nonlinearity):
        super().__init__()

        self.g = g
        self.layers = nn.ModuleList()
        self.layers.append(GraphConv(input_size, hidden_size, activation=nonlinearity))
        for i in range(n_layers - 1):
            self.layers.append(GraphConv(hidden_size, hidden_size, activation=nonlinearity))
        self.layers.append(GraphConv(hidden_size, output_size))

    def forward(self, inputs):
        outputs = inputs
        for i, layer in enumerate(self.layers):
            outputs = layer(self.g, outputs)
        return outputs



In [0]:
def main(args):
    # create the dataset
    train_dataset, test_dataset = LegacyPPIDataset(mode="train"), LegacyPPIDataset(mode="test")
    train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, collate_fn=collate_fn)
    test_dataloader = DataLoader(test_dataset, batch_size=args.batch_size, collate_fn=collate_fn)
    n_features, n_classes = train_dataset.features.shape[1], train_dataset.labels.shape[1]

    #g, features, labels, mask = load_cora_data()
    # create the model, loss function and optimizer
    device = torch.device("cpu" if args.gpu < 0 else "cuda:" + str(args.gpu))
    #( g, num_layers, in_dim, num_hidden, num_classes, heads, activation, feat_drop, attn_drop, negative_slope, residual)
    model = GAT(g=train_dataset.graph, num_layers=3, in_dim=n_features, num_hidden=256, num_classes=n_classes, heads=10, feat_drop=0., attn_drop=0., negative_slope=0.2, residual=False, activation= F.elu).to(device)
    #model = BasicGraphModel(g=train_dataset.graph, n_layers=2, input_size=n_features,hidden_size=256, output_size=n_classes, nonlinearity=F.elu).to(device)
    loss_fcn = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters())
    #optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)

    # train and test
    if args.mode == "train":
        train(model, loss_fcn, device, optimizer, train_dataloader, test_dataset)
        torch.save(model.state_dict(), MODEL_STATE_FILE)
    model.load_state_dict(torch.load(MODEL_STATE_FILE))
    return test(model, loss_fcn, device, test_dataloader)

In [0]:
def train(model, loss_fcn, device, optimizer, train_dataloader, test_dataset):
  fones = []
  for epoch in range(args.epochs):
      model.train()
      losses = []
      for batch, data in enumerate(train_dataloader):
          subgraph, features, labels = data
          features = features.to(device)
          labels = labels.to(device)
          model.g = subgraph
          for layer in model.gat_layers:
              layer.g = subgraph
          logits = model(features.float())
          loss = loss_fcn(logits, labels.float())
          optimizer.zero_grad()
          loss.backward()
          optimizer.step()
          losses.append(loss.item())
      loss_data = np.array(losses).mean()
      print("Epoch {:05d} | Loss: {:.4f}".format(epoch + 1, loss_data))

      if epoch % 5 == 0:
          scores = []
          fones_mean = []
          for batch, test_data in enumerate(test_dataset):
              subgraph, features, labels = test_data
              features = torch.tensor(features).to(device)
              labels = torch.tensor(labels).to(device)
              score, _ = evaluate(features.float(), model, subgraph, labels.float(), loss_fcn)
              scores.append(score)
              fones_mean.append(score)
          print("F1-Score: {:.4f} ".format(np.array(scores).mean()))
          fones.append(np.array(fones_mean).mean())
  print(score)
  #plot the training error wrt. the number of epochs: 
  plt.plot(range(1,251, 5), fones)
  plt.xlabel("num_epochs")
  plt.ylabel("F1 score")
  plt.title("Visualization of F1 score")

In [0]:
def test(model, loss_fcn, device, test_dataloader):
    test_scores = []
    for batch, test_data in enumerate(test_dataloader):
        subgraph, features, labels = test_data
        features = features.to(device)
        labels = labels.to(device)
        test_scores.append(evaluate(features, model, subgraph, labels.float(), loss_fcn)[0])
    mean_scores = np.array(test_scores).mean()
    print("F1-Score: {:.4f}".format(np.array(test_scores).mean()))
    return mean_scores


def evaluate(features, model, subgraph, labels, loss_fcn):
    with torch.no_grad():
        model.eval()
        model.g = subgraph
        for layer in model.gat_layers:
            layer.g = subgraph
        output = model(features.float())
        loss_data = loss_fcn(output, labels.float())
        predict = np.where(output.data.cpu().numpy() >= 0.5, 1, 0)
        score = f1_score(labels.data.cpu().numpy(), predict, average="micro")
        return score, loss_data.item()


def collate_fn(sample):
    graphs, features, labels = map(list, zip(*sample))
    graph = batch(graphs)
    features = torch.from_numpy(np.concatenate(features))
    labels = torch.from_numpy(np.concatenate(labels))
    return graph, features, labels

In [0]:
parser = argparse.ArgumentParser()
parser.add_argument("--mode",  choices=["train", "test"], default="train")
parser.add_argument("--gpu", type=int, default=-1, help="GPU to use. Set -1 to use CPU.")
parser.add_argument("--epochs", type=int, default=250)
parser.add_argument("--batch-size", type=int, default=2)
args = parser.parse_args(["--mode","train", "--gpu","0","--epochs","250", "--batch-size", "2"])
main(args)

Loading G...
Loading G...
Epoch 00001 | Loss: 0.5987
F1-Score: 0.3433 
Epoch 00002 | Loss: 0.5611
Epoch 00003 | Loss: 0.5420
Epoch 00004 | Loss: 0.5322
Epoch 00005 | Loss: 0.5260
Epoch 00006 | Loss: 0.5210
F1-Score: 0.3644 
Epoch 00007 | Loss: 0.5194
Epoch 00008 | Loss: 0.5154
Epoch 00009 | Loss: 0.5102
Epoch 00010 | Loss: 0.5101
Epoch 00011 | Loss: 0.5139
F1-Score: 0.3487 
Epoch 00012 | Loss: 0.5031
Epoch 00013 | Loss: 0.4964
Epoch 00014 | Loss: 0.4898
Epoch 00015 | Loss: 0.4912
Epoch 00016 | Loss: 0.4940
F1-Score: 0.3212 
Epoch 00017 | Loss: 0.4833
Epoch 00018 | Loss: 0.4734
Epoch 00019 | Loss: 0.4652
Epoch 00020 | Loss: 0.4562
Epoch 00021 | Loss: 0.4498
F1-Score: 0.4996 
Epoch 00022 | Loss: 0.4466
Epoch 00023 | Loss: 0.4430
Epoch 00024 | Loss: 0.4468
Epoch 00025 | Loss: 0.4501
Epoch 00026 | Loss: 0.4375
F1-Score: 0.5735 
Epoch 00027 | Loss: 0.4245
Epoch 00028 | Loss: 0.4077
Epoch 00029 | Loss: 0.3953
Epoch 00030 | Loss: 0.3848
Epoch 00031 | Loss: 0.3756
F1-Score: 0.6617 
Epoch 00032