In [1]:
# %load_ext autoreload
# %autoreload 2

import sys
sys.path.append("../src")

import os
os.environ['CUDA_VISIBLE_DEVICES'] = "1"

In [2]:
import pickle
import tqdm
import numpy as np
import pandas as pd
import networkx as nx
from gensim.models import KeyedVectors as word2vec

import torch
from torch_geometric.utils import from_networkx
from torch_geometric.data import DataLoader

import torch.nn.functional as F
from torch_geometric.data import DataLoader
import torch_geometric.transforms as T
from torch_geometric.nn import GATConv
from torch_geometric.nn import GraphConv, TopKPooling
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp


from code_parser import *
from dataset import CloneDataset

In [3]:
transform = T.NormalizeFeatures()
dataset = CloneDataset(root="../data/", functions_path="../data/functions/", pairs_path="../data/bcb_pair_ids.pkl", transform=transform)

In [4]:
dataset = dataset.shuffle()
n = (len(dataset) + 9) // 10
test_dataset = dataset[:n]
val_dataset = dataset[n:2 * n]
train_dataset = dataset[2 * n:]
test_loader = DataLoader(test_dataset, batch_size=128)
val_loader = DataLoader(val_dataset, batch_size=128, num_workers=64)
train_loader = DataLoader(train_dataset, batch_size=128, num_workers=64)

In [5]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.conv1 = GraphConv(dataset.num_features, 128)
        self.pool1 = TopKPooling(128, ratio=0.8)
        self.conv2 = GraphConv(128, 128)
        self.pool2 = TopKPooling(128, ratio=0.8)
        self.conv3 = GraphConv(128, 128)
        self.pool3 = TopKPooling(128, ratio=0.8)

        self.lin1 = torch.nn.Linear(256, 128)
        self.lin2 = torch.nn.Linear(128, 64)
        self.lin3 = torch.nn.Linear(64, 6)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch

        x = F.relu(self.conv1(x, edge_index))
        x, edge_index, _, batch, _, _ = self.pool1(x, edge_index, None, batch)
        x1 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

        x = F.relu(self.conv2(x, edge_index))
        x, edge_index, _, batch, _, _ = self.pool2(x, edge_index, None, batch)
        x2 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

        x = F.relu(self.conv3(x, edge_index))
        x, edge_index, _, batch, _, _ = self.pool3(x, edge_index, None, batch)
        x3 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

        x = x1 + x2 + x3

        x = F.relu(self.lin1(x))
        x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu(self.lin2(x))
        x = F.log_softmax(self.lin3(x), dim=-1)

        return x

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

In [7]:
def train(epoch):
    model.train()

    loss_all = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, data.y)
        loss.backward()
        print(f"loss = {loss.item()}", end="\r")
        loss_all += data.num_graphs * loss.item()
        optimizer.step()
    return loss_all / len(train_dataset)


@torch.no_grad()
def test(loader):
    model.eval()

    correct = 0
    for data in loader:
        data = data.to(device)
        pred = model(data).max(dim=1)[1]
        correct += pred.eq(data.y).sum().item()
    return correct / len(loader.dataset)


In [None]:
best_val_acc = 0
for epoch in range(13, 201):
    loss = train(epoch)
    train_acc = test(train_loader)
    val_acc = test(val_loader)
    
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "../data/play.pt")
        
    
    print('Epoch: {:03d}, Loss: {:.5f}, Train Acc: {:.5f}, Val Acc: {:.5f}, Best: {:.5f}'.
          format(epoch, loss, train_acc, val_acc, best_val_acc))

Epoch: 013, Loss: 0.57298, Train Acc: 0.80562, Val Acc: 0.80599, Best: 0.80599
Epoch: 014, Loss: 0.56581, Train Acc: 0.80845, Val Acc: 0.80845, Best: 0.80845
Epoch: 015, Loss: 0.55768, Train Acc: 0.81091, Val Acc: 0.81163, Best: 0.81163
Epoch: 016, Loss: 0.55246, Train Acc: 0.81339, Val Acc: 0.81224, Best: 0.81224
Epoch: 017, Loss: 0.54628, Train Acc: 0.81638, Val Acc: 0.81583, Best: 0.81583
Epoch: 018, Loss: 0.53971, Train Acc: 0.81883, Val Acc: 0.81819, Best: 0.81819
Epoch: 019, Loss: 0.53611, Train Acc: 0.82092, Val Acc: 0.81840, Best: 0.81840
Epoch: 020, Loss: 0.53071, Train Acc: 0.82342, Val Acc: 0.82055, Best: 0.82055
Epoch: 021, Loss: 0.52401, Train Acc: 0.82277, Val Acc: 0.82075, Best: 0.82075
Epoch: 022, Loss: 0.52307, Train Acc: 0.82626, Val Acc: 0.82424, Best: 0.82424
Epoch: 023, Loss: 0.51710, Train Acc: 0.82832, Val Acc: 0.82506, Best: 0.82506
Epoch: 024, Loss: 0.51357, Train Acc: 0.83047, Val Acc: 0.82475, Best: 0.82506
Epoch: 025, Loss: 0.50977, Train Acc: 0.83207, Val A

Epoch: 001, Loss: 1.28519, Train Acc: 0.64447, Val Acc: 0.64459, Best: 0.64459  
Epoch: 002, Loss: 0.88842, Train Acc: 0.68974, Val Acc: 0.69206, Best: 0.69206  
Epoch: 003, Loss: 0.79654, Train Acc: 0.71359, Val Acc: 0.71883, Best: 0.71883  
Epoch: 004, Loss: 0.74853, Train Acc: 0.72913, Val Acc: 0.73164, Best: 0.73164  
Epoch: 005, Loss: 0.71533, Train Acc: 0.74874, Val Acc: 0.75154, Best: 0.75154  
Epoch: 006, Loss: 0.68941, Train Acc: 0.75900, Val Acc: 0.76138, Best: 0.76138  

In [10]:
epoch

12