In [1]:
# %load_ext autoreload
# %autoreload 2

import sys
sys.path.append("../src")

import os
os.environ['CUDA_VISIBLE_DEVICES'] = "0"

In [2]:
import pickle
import tqdm
import numpy as np
import pandas as pd
import networkx as nx
from gensim.models import KeyedVectors as word2vec

import torch
from torch_geometric.utils import from_networkx
from torch_geometric.data import DataLoader

import torch.nn.functional as F
from torch_geometric.data import DataLoader
import torch_geometric.transforms as T
from torch_geometric.nn import GATConv
from torch_geometric.nn import GraphConv, TopKPooling
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp


from code_parser import *
from dataset import *

In [3]:
train_dataset = FastCloneDataset(root="../data/", functions_path="../data/networkx_data_idx/", pairs_path="../data/train.npz", return_pair_data=True)
val_dataset = FastCloneDataset(root="../data/", functions_path="../data/networkx_data_idx/", pairs_path="../data/valid.npz", return_pair_data=True)
test_dataset = FastCloneDataset(root="../data/", functions_path="../data/networkx_data_idx/", pairs_path="../data/test.npz", return_pair_data=True)

In [4]:
batch_size = 16
workers = 8

In [5]:
test_loader = DataLoader(test_dataset, batch_size=batch_size, follow_batch=['x_s', 'x_t'])
val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=workers, shuffle=True, follow_batch=['x_s', 'x_t'])
train_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=workers, shuffle=True, follow_batch=['x_s', 'x_t'])

In [6]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.conv1 = GraphConv(256, 128)
        self.pool1 = TopKPooling(128, ratio=0.8)
        self.conv2 = GraphConv(128, 128)
        self.pool2 = TopKPooling(128, ratio=0.8)
        self.conv3 = GraphConv(128, 128)
        self.pool3 = TopKPooling(128, ratio=0.8)

        self.lin1 = torch.nn.Linear(256, 128)
        self.lin2 = torch.nn.Linear(128, 64)
        self.lin3 = torch.nn.Linear(64, 6)

    def forward(self, data):
        
        def common(x, edge_index, batch):
            x = F.relu(self.conv1(x, edge_index))
            x, edge_index, _, batch, _, _ = self.pool1(x, edge_index, None, batch)
            x1 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

            x = F.relu(self.conv2(x, edge_index))
            x, edge_index, _, batch, _, _ = self.pool2(x, edge_index, None, batch)
            x2 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

            x = F.relu(self.conv3(x, edge_index))
            x, edge_index, _, batch, _, _ = self.pool3(x, edge_index, None, batch)
            x3 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

            return x1, x2, x3
        
        s1, s2, s3 = common(data.x_s, data.edge_index_s, data.x_s_batch)
        t1, t2, t3 = common(data.x_t, data.edge_index_t, data.x_t_batch)
        
        x = (s1 - t1) + (s2 - t2) + (s3 - t3)
        
        x = F.relu(self.lin1(x))
        x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu(self.lin2(x))
        x = F.log_softmax(self.lin3(x), dim=-1)

        return x

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

In [8]:
def train(epoch):
    model.train()

    loss_all = []
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, data.y)
        loss.backward()
        optimizer.step()
        print(f"loss = {loss.item()}", end="\r")
        loss_all.append(float(loss.detach().cpu()))
    return np.mean(loss_all)

@torch.no_grad()
def test(loader):
    model.eval()

    correct = 0
    for data in loader:
        data = data.to(device)
        pred = model(data).max(dim=1)[1]
        correct += pred.eq(data.y).sum().item()
    return correct / len(loader.dataset)


In [None]:
best_val_acc = 0
for epoch in range(1, 201):
    loss = train(epoch)
    train_acc = test(train_loader)
    val_acc = test(val_loader)
    
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "../data/new_main_pair.pt")
        
    test_acc = test(test_loader)
    
    log = 'Epoch: {:03d}, Loss: {:.5f}, Train Acc: {:.5f}, Val Acc: {:.5f}, Test Acc: {:.5f}'.format(epoch, loss, train_acc, val_acc, test_acc)
    
    with open("../data/new_main_pair.log", "a") as f:
        f.write(log + "\n")

loss = 1.1098980903625488

In [None]:
test_acc = test(test_loader)
test_acc