In [1]:
# %load_ext autoreload
# %autoreload 2

import sys
sys.path.append("../src")

import os
os.environ['CUDA_VISIBLE_DEVICES'] = "1"

In [2]:
import pickle
import tqdm
import numpy as np
import pandas as pd
import networkx as nx
from gensim.models import KeyedVectors as word2vec

import torch
from torch_geometric.utils import from_networkx
from torch_geometric.data import DataLoader

import torch.nn.functional as F
from torch_geometric.data import DataLoader
import torch_geometric.transforms as T
from torch_geometric.nn import GATConv
from torch_geometric.nn import GraphConv, TopKPooling
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp


from code_parser import *
from dataset import *

In [3]:
train_dataset = FastCloneDataset(root="../data/", functions_path="../data/networkx_data_idx/", pairs_path="../data/train.npz", return_pair_data=False)
val_dataset = FastCloneDataset(root="../data/", functions_path="../data/networkx_data_idx/", pairs_path="../data/valid.npz", return_pair_data=False)
test_dataset = FastCloneDataset(root="../data/", functions_path="../data/networkx_data_idx/", pairs_path="../data/test.npz", return_pair_data=False)

In [4]:
batch_size = 16
workers = 8

In [5]:
test_loader = DataLoader(test_dataset, batch_size=batch_size)
val_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=workers, shuffle=True)
train_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=workers, shuffle=True)

In [6]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.conv1 = GraphConv(train_dataset.num_features, 128)
        self.pool1 = TopKPooling(128, ratio=0.8)
        self.conv2 = GraphConv(128, 128)
        self.pool2 = TopKPooling(128, ratio=0.8)
        self.conv3 = GraphConv(128, 128)
        self.pool3 = TopKPooling(128, ratio=0.8)

        self.lin1 = torch.nn.Linear(256, 128)
        self.lin2 = torch.nn.Linear(128, 64)
        self.lin3 = torch.nn.Linear(64, 6)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch

        x = F.relu(self.conv1(x, edge_index))
        x, edge_index, _, batch, _, _ = self.pool1(x, edge_index, None, batch)
        x1 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

        x = F.relu(self.conv2(x, edge_index))
        x, edge_index, _, batch, _, _ = self.pool2(x, edge_index, None, batch)
        x2 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

        x = F.relu(self.conv3(x, edge_index))
        x, edge_index, _, batch, _, _ = self.pool3(x, edge_index, None, batch)
        x3 = torch.cat([gmp(x, batch), gap(x, batch)], dim=1)

        x = x1 + x2 + x3

        x = F.relu(self.lin1(x))
        x = F.dropout(x, p=0.5, training=self.training)
        x = F.relu(self.lin2(x))
        x = F.log_softmax(self.lin3(x), dim=-1)

        return x

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

In [8]:
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)

In [9]:
def train(epoch):
    model.train()

    loss_all = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, data.y)
        loss.backward()
        print(f"loss = {loss.item()}", end="\r")
        loss_all += data.num_graphs * loss.item()
        optimizer.step()
    return loss_all / len(train_dataset)


@torch.no_grad()
def test(loader):
    model.eval()

    correct = 0
    for data in loader:
        data = data.to(device)
        pred = model(data).max(dim=1)[1]
        correct += pred.eq(data.y).sum().item()
    return correct / len(loader.dataset)


In [None]:
best_val_acc = 0
for epoch in range(3, 201):
    loss = train(epoch)
    train_acc = test(train_loader)
    val_acc = test(val_loader)
    
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "../data/new_main_damn_join.pt")
        
    test_acc = test(test_loader)
    
    scheduler.step()
    
    log = 'Epoch: {:03d}, Loss: {:.5f}, Train Acc: {:.5f}, Val Acc: {:.5f}, Test Acc: {:.5f}'.format(epoch, loss, train_acc, val_acc, test_acc)
    
    with open("../data/new_main_damn_join.log", "a") as f:
        f.write(log + "\n")

loss = 3.9498400688171387

In [None]:
test_acc = test(test_loader)
test_acc

```
Epoch: 001, Loss: 1.28519, Train Acc: 0.64447, Val Acc: 0.64459, Best: 0.64459  
Epoch: 002, Loss: 0.88842, Train Acc: 0.68974, Val Acc: 0.69206, Best: 0.69206  
Epoch: 003, Loss: 0.79654, Train Acc: 0.71359, Val Acc: 0.71883, Best: 0.71883  
Epoch: 004, Loss: 0.74853, Train Acc: 0.72913, Val Acc: 0.73164, Best: 0.73164  
Epoch: 005, Loss: 0.71533, Train Acc: 0.74874, Val Acc: 0.75154, Best: 0.75154  
Epoch: 006, Loss: 0.68941, Train Acc: 0.75900, Val Acc: 0.76138, Best: 0.76138  
Epoch: 013, Loss: 0.57298, Train Acc: 0.80562, Val Acc: 0.80599, Best: 0.80599
Epoch: 014, Loss: 0.56581, Train Acc: 0.80845, Val Acc: 0.80845, Best: 0.80845
Epoch: 015, Loss: 0.55768, Train Acc: 0.81091, Val Acc: 0.81163, Best: 0.81163
Epoch: 016, Loss: 0.55246, Train Acc: 0.81339, Val Acc: 0.81224, Best: 0.81224
Epoch: 017, Loss: 0.54628, Train Acc: 0.81638, Val Acc: 0.81583, Best: 0.81583
Epoch: 018, Loss: 0.53971, Train Acc: 0.81883, Val Acc: 0.81819, Best: 0.81819
Epoch: 019, Loss: 0.53611, Train Acc: 0.82092, Val Acc: 0.81840, Best: 0.81840
Epoch: 020, Loss: 0.53071, Train Acc: 0.82342, Val Acc: 0.82055, Best: 0.82055
Epoch: 021, Loss: 0.52401, Train Acc: 0.82277, Val Acc: 0.82075, Best: 0.82075
Epoch: 022, Loss: 0.52307, Train Acc: 0.82626, Val Acc: 0.82424, Best: 0.82424
Epoch: 023, Loss: 0.51710, Train Acc: 0.82832, Val Acc: 0.82506, Best: 0.82506
Epoch: 024, Loss: 0.51357, Train Acc: 0.83047, Val Acc: 0.82475, Best: 0.82506
Epoch: 025, Loss: 0.50977, Train Acc: 0.83207, Val Acc: 0.82578, Best: 0.82578
Epoch: 026, Loss: 0.50584, Train Acc: 0.83216, Val Acc: 0.82404, Best: 0.82578
Epoch: 027, Loss: 0.50285, Train Acc: 0.83504, Val Acc: 0.82804, Best: 0.82804
Epoch: 028, Loss: 0.50057, Train Acc: 0.83674, Val Acc: 0.83029, Best: 0.83029
Epoch: 029, Loss: 0.49668, Train Acc: 0.83774, Val Acc: 0.83132, Best: 0.83132
Epoch: 030, Loss: 0.49458, Train Acc: 0.83928, Val Acc: 0.83337, Best: 0.83337
Epoch: 031, Loss: 0.49189, Train Acc: 0.83907, Val Acc: 0.83091, Best: 0.83337
Epoch: 032, Loss: 0.48920, Train Acc: 0.84043, Val Acc: 0.83203, Best: 0.83337
Epoch: 033, Loss: 0.48458, Train Acc: 0.84352, Val Acc: 0.83378, Best: 0.83378
Epoch: 034, Loss: 0.48185, Train Acc: 0.84265, Val Acc: 0.83429, Best: 0.83429
Epoch: 035, Loss: 0.47953, Train Acc: 0.84492, Val Acc: 0.83767, Best: 0.83767
Epoch: 036, Loss: 0.47965, Train Acc: 0.84538, Val Acc: 0.83778, Best: 0.83778
Epoch: 037, Loss: 0.47545, Train Acc: 0.84602, Val Acc: 0.83778, Best: 0.83778
Epoch: 038, Loss: 0.47352, Train Acc: 0.84679, Val Acc: 0.83839, Best: 0.83839
Epoch: 039, Loss: 0.47038, Train Acc: 0.84655, Val Acc: 0.83911, Best: 0.83911
Epoch: 040, Loss: 0.46773, Train Acc: 0.84784, Val Acc: 0.84024, Best: 0.84024
Epoch: 041, Loss: 0.46530, Train Acc: 0.85026, Val Acc: 0.83901, Best: 0.84024
Epoch: 042, Loss: 0.46513, Train Acc: 0.85021, Val Acc: 0.84055, Best: 0.84055
Epoch: 043, Loss: 0.46085, Train Acc: 0.85071, Val Acc: 0.84116, Best: 0.84116
Epoch: 044, Loss: 0.46145, Train Acc: 0.85135, Val Acc: 0.84331, Best: 0.84331
Epoch: 045, Loss: 0.46038, Train Acc: 0.85183, Val Acc: 0.84198, Best: 0.84331
Epoch: 046, Loss: 0.45750, Train Acc: 0.85276, Val Acc: 0.84413, Best: 0.84413
Epoch: 047, Loss: 0.45426, Train Acc: 0.85426, Val Acc: 0.84342, Best: 0.84413
Epoch: 048, Loss: 0.45329, Train Acc: 0.85273, Val Acc: 0.84137, Best: 0.84413
Epoch: 049, Loss: 0.45279, Train Acc: 0.85512, Val Acc: 0.84383, Best: 0.84413
Epoch: 050, Loss: 0.44985, Train Acc: 0.85565, Val Acc: 0.84526, Best: 0.84526
Epoch: 051, Loss: 0.44780, Train Acc: 0.85596, Val Acc: 0.84311, Best: 0.84526
Epoch: 052, Loss: 0.44632, Train Acc: 0.85720, Val Acc: 0.84557, Best: 0.84557
Epoch: 053, Loss: 0.44510, Train Acc: 0.85747, Val Acc: 0.84506, Best: 0.84557
Epoch: 054, Loss: 0.44218, Train Acc: 0.85802, Val Acc: 0.84454, Best: 0.84557
Epoch: 055, Loss: 0.44075, Train Acc: 0.85735, Val Acc: 0.84331, Best: 0.84557
Epoch: 056, Loss: 0.43926, Train Acc: 0.85808, Val Acc: 0.84598, Best: 0.84598
Epoch: 057, Loss: 0.43851, Train Acc: 0.85966, Val Acc: 0.84649, Best: 0.84649
Epoch: 058, Loss: 0.43756, Train Acc: 0.85757, Val Acc: 0.84444, Best: 0.84649
Epoch: 059, Loss: 0.43728, Train Acc: 0.86103, Val Acc: 0.84885, Best: 0.84885
Epoch: 060, Loss: 0.43349, Train Acc: 0.86269, Val Acc: 0.84906, Best: 0.84906
Epoch: 061, Loss: 0.43370, Train Acc: 0.85784, Val Acc: 0.84372, Best: 0.84906
Epoch: 062, Loss: 0.43201, Train Acc: 0.86180, Val Acc: 0.84731, Best: 0.84906
Epoch: 063, Loss: 0.43011, Train Acc: 0.86319, Val Acc: 0.84629, Best: 0.84906
Epoch: 064, Loss: 0.42922, Train Acc: 0.86361, Val Acc: 0.84731, Best: 0.84906
Epoch: 065, Loss: 0.42587, Train Acc: 0.86392, Val Acc: 0.84865, Best: 0.84906
Epoch: 066, Loss: 0.42622, Train Acc: 0.86442, Val Acc: 0.84895, Best: 0.84906
Epoch: 067, Loss: 0.42425, Train Acc: 0.86416, Val Acc: 0.84916, Best: 0.84916
Epoch: 068, Loss: 0.42335, Train Acc: 0.86351, Val Acc: 0.84947, Best: 0.84947
Epoch: 069, Loss: 0.42090, Train Acc: 0.86583, Val Acc: 0.84988, Best: 0.84988
Epoch: 070, Loss: 0.41949, Train Acc: 0.86616, Val Acc: 0.84906, Best: 0.84988
Epoch: 071, Loss: 0.41683, Train Acc: 0.86631, Val Acc: 0.85100, Best: 0.85100
Epoch: 072, Loss: 0.41939, Train Acc: 0.86678, Val Acc: 0.84916, Best: 0.85100
Epoch: 073, Loss: 0.41743, Train Acc: 0.86863, Val Acc: 0.85193, Best: 0.85193
Epoch: 074, Loss: 0.41432, Train Acc: 0.86681, Val Acc: 0.84988, Best: 0.85193
Epoch: 075, Loss: 0.41406, Train Acc: 0.86801, Val Acc: 0.85059, Best: 0.85193
Epoch: 076, Loss: 0.41312, Train Acc: 0.86971, Val Acc: 0.85213, Best: 0.85213
Epoch: 077, Loss: 0.41265, Train Acc: 0.86994, Val Acc: 0.85265, Best: 0.85265
Epoch: 078, Loss: 0.41216, Train Acc: 0.87017, Val Acc: 0.85213, Best: 0.85265
Epoch: 079, Loss: 0.40852, Train Acc: 0.87030, Val Acc: 0.85121, Best: 0.85265
Epoch: 080, Loss: 0.40850, Train Acc: 0.87011, Val Acc: 0.85039, Best: 0.85265
Epoch: 081, Loss: 0.40619, Train Acc: 0.87195, Val Acc: 0.85295, Best: 0.85295
Epoch: 082, Loss: 0.40591, Train Acc: 0.87266, Val Acc: 0.85213, Best: 0.85295
Epoch: 083, Loss: 0.40437, Train Acc: 0.87224, Val Acc: 0.85183, Best: 0.85295
Epoch: 084, Loss: 0.40268, Train Acc: 0.87111, Val Acc: 0.85224, Best: 0.85295
Epoch: 085, Loss: 0.40222, Train Acc: 0.87235, Val Acc: 0.85326, Best: 0.85326
Epoch: 086, Loss: 0.40034, Train Acc: 0.87340, Val Acc: 0.85408, Best: 0.85408
Epoch: 087, Loss: 0.39954, Train Acc: 0.87299, Val Acc: 0.85121, Best: 0.85408
Epoch: 088, Loss: 0.39930, Train Acc: 0.87500, Val Acc: 0.85295, Best: 0.85408
Epoch: 089, Loss: 0.39886, Train Acc: 0.87586, Val Acc: 0.85449, Best: 0.85449
Epoch: 090, Loss: 0.39763, Train Acc: 0.87311, Val Acc: 0.84926, Best: 0.85449
Epoch: 091, Loss: 0.39668, Train Acc: 0.87412, Val Acc: 0.85295, Best: 0.85449
Epoch: 092, Loss: 0.39508, Train Acc: 0.87224, Val Acc: 0.85162, Best: 0.85449
Epoch: 093, Loss: 0.39274, Train Acc: 0.87679, Val Acc: 0.85357, Best: 0.85449
Epoch: 094, Loss: 0.39218, Train Acc: 0.87667, Val Acc: 0.85511, Best: 0.85511
Epoch: 095, Loss: 0.39277, Train Acc: 0.87744, Val Acc: 0.85357, Best: 0.85511
Epoch: 096, Loss: 0.39104, Train Acc: 0.87857, Val Acc: 0.85459, Best: 0.85511
Epoch: 097, Loss: 0.38919, Train Acc: 0.87556, Val Acc: 0.85316, Best: 0.85511
Epoch: 098, Loss: 0.38934, Train Acc: 0.87694, Val Acc: 0.85449, Best: 0.85511
Epoch: 099, Loss: 0.38701, Train Acc: 0.87923, Val Acc: 0.85429, Best: 0.85511
Epoch: 100, Loss: 0.38732, Train Acc: 0.88032, Val Acc: 0.85695, Best: 0.85695
Epoch: 101, Loss: 0.38675, Train Acc: 0.87926, Val Acc: 0.85644, Best: 0.85695
Epoch: 102, Loss: 0.38623, Train Acc: 0.87956, Val Acc: 0.85541, Best: 0.85695
Epoch: 103, Loss: 0.38318, Train Acc: 0.88184, Val Acc: 0.85695, Best: 0.85695
Epoch: 104, Loss: 0.38352, Train Acc: 0.88047, Val Acc: 0.85634, Best: 0.85695
Epoch: 105, Loss: 0.38231, Train Acc: 0.87990, Val Acc: 0.85675, Best: 0.85695
Epoch: 106, Loss: 0.38149, Train Acc: 0.88230, Val Acc: 0.85593, Best: 0.85695
Epoch: 107, Loss: 0.38000, Train Acc: 0.88200, Val Acc: 0.85664, Best: 0.85695
Epoch: 108, Loss: 0.37936, Train Acc: 0.88022, Val Acc: 0.85511, Best: 0.85695
Epoch: 109, Loss: 0.37777, Train Acc: 0.88285, Val Acc: 0.85757, Best: 0.85757
Epoch: 110, Loss: 0.37777, Train Acc: 0.88361, Val Acc: 0.85582, Best: 0.85757
Epoch: 111, Loss: 0.37693, Train Acc: 0.87709, Val Acc: 0.85152, Best: 0.85757
Epoch: 112, Loss: 0.37549, Train Acc: 0.88334, Val Acc: 0.85623, Best: 0.85757
Epoch: 113, Loss: 0.37409, Train Acc: 0.88430, Val Acc: 0.85634, Best: 0.85757
Epoch: 114, Loss: 0.37556, Train Acc: 0.88397, Val Acc: 0.85623, Best: 0.85757
Epoch: 115, Loss: 0.37251, Train Acc: 0.87722, Val Acc: 0.85193, Best: 0.85757
Epoch: 116, Loss: 0.37162, Train Acc: 0.88420, Val Acc: 0.85685, Best: 0.85757
Epoch: 117, Loss: 0.37022, Train Acc: 0.88202, Val Acc: 0.85552, Best: 0.85757
Epoch: 118, Loss: 0.36980, Train Acc: 0.88585, Val Acc: 0.85911, Best: 0.85911
Epoch: 119, Loss: 0.36869, Train Acc: 0.88409, Val Acc: 0.85849, Best: 0.85911
Epoch: 120, Loss: 0.36725, Train Acc: 0.88284, Val Acc: 0.85695, Best: 0.85911
Epoch: 121, Loss: 0.36863, Train Acc: 0.88561, Val Acc: 0.85767, Best: 0.85911
Epoch: 122, Loss: 0.36665, Train Acc: 0.88763, Val Acc: 0.86064, Best: 0.86064
Epoch: 123, Loss: 0.36491, Train Acc: 0.88562, Val Acc: 0.85716, Best: 0.86064
Epoch: 124, Loss: 0.36405, Train Acc: 0.88685, Val Acc: 0.86034, Best: 0.86064
Epoch: 125, Loss: 0.36397, Train Acc: 0.88136, Val Acc: 0.85500, Best: 0.86064
Epoch: 126, Loss: 0.36385, Train Acc: 0.88895, Val Acc: 0.86146, Best: 0.86146
Epoch: 127, Loss: 0.36049, Train Acc: 0.88935, Val Acc: 0.86105, Best: 0.86146
Epoch: 128, Loss: 0.36135, Train Acc: 0.88962, Val Acc: 0.86095, Best: 0.86146
Epoch: 129, Loss: 0.36089, Train Acc: 0.88854, Val Acc: 0.86023, Best: 0.86146
Epoch: 130, Loss: 0.35989, Train Acc: 0.88771, Val Acc: 0.85972, Best: 0.86146
Epoch: 131, Loss: 0.35874, Train Acc: 0.88836, Val Acc: 0.86013, Best: 0.86146
Epoch: 132, Loss: 0.35990, Train Acc: 0.88299, Val Acc: 0.85572, Best: 0.86146
Epoch: 133, Loss: 0.35755, Train Acc: 0.89135, Val Acc: 0.86290, Best: 0.86290
Epoch: 134, Loss: 0.35595, Train Acc: 0.89195, Val Acc: 0.86239, Best: 0.86290
Epoch: 135, Loss: 0.35703, Train Acc: 0.89153, Val Acc: 0.86054, Best: 0.86290
Epoch: 136, Loss: 0.35585, Train Acc: 0.88763, Val Acc: 0.85941, Best: 0.86290
Epoch: 137, Loss: 0.35587, Train Acc: 0.89184, Val Acc: 0.86177, Best: 0.86290
Epoch: 144, Loss: 0.34991, Train Acc: 0.89030, Val Acc: 0.86013, Best: 0.86290
Epoch: 145, Loss: 0.34883, Train Acc: 0.88435, Val Acc: 0.85490, Best: 0.86290
Epoch: 146, Loss: 0.34821, Train Acc: 0.89549, Val Acc: 0.86536, Best: 0.86536
Epoch: 147, Loss: 0.34704, Train Acc: 0.89037, Val Acc: 0.85931, Best: 0.86536
Epoch: 148, Loss: 0.34774, Train Acc: 0.89535, Val Acc: 0.86290, Best: 0.86536
Epoch: 149, Loss: 0.34638, Train Acc: 0.89543, Val Acc: 0.86423, Best: 0.86536
Epoch: 150, Loss: 0.34581, Train Acc: 0.89566, Val Acc: 0.86403, Best: 0.86536
Epoch: 151, Loss: 0.34289, Train Acc: 0.89407, Val Acc: 0.86146, Best: 0.86536
Epoch: 152, Loss: 0.34359, Train Acc: 0.89572, Val Acc: 0.86044, Best: 0.86536
Epoch: 160, Loss: 0.33755, Train Acc: 0.89809, Val Acc: 0.86341, Best: 0.86536
Epoch: 161, Loss: 0.33734, Train Acc: 0.89716, Val Acc: 0.86034, Best: 0.86536
Epoch: 162, Loss: 0.33589, Train Acc: 0.90007, Val Acc: 0.86300, Best: 0.86536
Epoch: 163, Loss: 0.33698, Train Acc: 0.89832, Val Acc: 0.86331, Best: 0.86536
Epoch: 164, Loss: 0.33380, Train Acc: 0.89891, Val Acc: 0.86075, Best: 0.86536
Epoch: 165, Loss: 0.33534, Train Acc: 0.89804, Val Acc: 0.86228, Best: 0.86536
Epoch: 166, Loss: 0.33451, Train Acc: 0.89862, Val Acc: 0.86464, Best: 0.86536
Epoch: 167, Loss: 0.33296, Train Acc: 0.89598, Val Acc: 0.85880, Best: 0.86536
Epoch: 168, Loss: 0.33340, Train Acc: 0.89566, Val Acc: 0.86044, Best: 0.86536
Epoch: 169, Loss: 0.33206, Train Acc: 0.90094, Val Acc: 0.86557, Best: 0.86557
Epoch: 170, Loss: 0.33216, Train Acc: 0.90032, Val Acc: 0.86249, Best: 0.86557
Epoch: 171, Loss: 0.33110, Train Acc: 0.89854, Val Acc: 0.85972, Best: 0.86557
Epoch: 172, Loss: 0.32980, Train Acc: 0.89493, Val Acc: 0.85941, Best: 0.86557
Epoch: 173, Loss: 0.32763, Train Acc: 0.90178, Val Acc: 0.86311, Best: 0.86557

## 
test = 0.8718211648892535
```

In [None]:
test(test_loader)