In [1]:
import sys
sys.path.append("../src")

import os
os.environ['CUDA_VISIBLE_DEVICES'] = "0,1"

In [2]:
import os
from argparse import ArgumentParser

import dgl
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn.pytorch import GraphConv
from torch.utils.data import DataLoader

from code_parser import *
from dgl_dataset import CloneDataset

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Using backend: pytorch


In [3]:
import os
from argparse import ArgumentParser

import dgl
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn.pytorch import GraphConv
from torch.utils.data import DataLoader

from code_parser import *
from dgl_dataset import CloneDataset

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


def collate(samples):
    # The input `samples` is a list of pairs
    #  (graph, label).
    graph1, graph2, labels = map(list, zip(*samples))
    batched_graph1 = dgl.batch(graph1)
    batched_graph2 = dgl.batch(graph2)
    return batched_graph1, batched_graph2, torch.tensor(labels)


class NetBasic(nn.Module):

    def __init__(self, hparams):
        super(NetBasic, self).__init__()

        self.hparams = hparams

        self.build_model()

    def build_model(self):
        self.conv1 = GraphConv(self.hparams.num_features, self.hparams.hidden_dim)
        self.conv2 = GraphConv(self.hparams.hidden_dim, self.hparams.hidden_dim)
#         self.conv3 = GraphConv(self.hparams.hidden_dim, self.hparams.hidden_dim)
        self.classify = nn.Linear(self.hparams.hidden_dim * 2, self.hparams.num_classes)

    def forward_(self, g1, g2):
        h1 = g1.ndata['data'].view(-1, self.hparams.num_features).float().to(device)
        h1 = F.relu(self.conv1(g1, h1))
        h1 = F.relu(self.conv2(g1, h1))
#         h1 = F.relu(self.conv3(g1, h1))
        g1.ndata['h'] = h1

        h2 = g2.ndata['data'].view(-1, self.hparams.num_features).float().to(device)
        h2 = F.relu(self.conv1(g2, h2))
        h2 = F.relu(self.conv2(g2, h2))
#         h2 = F.relu(self.conv3(g2, h2))
        g2.ndata['h'] = h2

        hg1 = dgl.mean_nodes(g1, 'h')
        hg2 = dgl.mean_nodes(g2, 'h')

        return F.log_softmax(self.classify(torch.cat([hg1, hg2], dim=-1)), dim=-1)

    def forward(self, g1, g2):
        return self.forward_(g1, g2)

    def training_step(self, data):
        g1, g2, label = data
        output = self.forward(g1, g2)
        loss = F.nll_loss(output, label.to(device))
        del output
        return {'loss': loss}

    def validation_step(self, data):
        g1, g2, label = data
        output = self.forward(g1, g2)
        loss = F.nll_loss(output, label.to(device))
        pred = output.max(dim=1)[1]
        acc = pred.eq(label.cuda()).type(torch.float32).mean()
        ret = {'val_loss': loss.item(), 'val_acc': acc.item()}
        del loss
        del pred
        del output
        del acc
        return ret

    def validation_epoch_end(self, outputs):
        avg_loss = np.mean([x['val_loss'] for x in outputs])
        avg_acc = np.mean([x['val_acc'] for x in outputs])

        return {'val_loss': avg_loss, 'val_acc': avg_acc}

#         def test_step(self, data, batch_idx):
#             g1, g2, label = data
#             output = self.forward(g1, g2)
#             loss = F.cross_entropy(output, label)
#             pred = torch.softmax(output, 1).max(dim=1)[1]
#             acc = pred.eq(data.y).type(torch.float32).mean()
#             return {'test_loss': loss, 'test_acc': acc}

#         def test_epoch_end(self, outputs):
#             avg_loss = torch.stack([x['test_loss'] for x in outputs]).mean()
#             avg_acc = torch.stack([x['test_acc'] for x in outputs]).mean()

#             tensorboard_logs = {'avg_test_loss': avg_loss, 'avg_test_acc': avg_acc}
#             return {'test_loss': avg_loss, 'test_acc': avg_acc, 'log': tensorboard_logs}

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate)
#         scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)
        return optimizer

    def prepare_data(self):
        dataset = CloneDataset(
            functions_path=os.path.join(self.hparams.root, "dgl_functions"),
            pairs_path=os.path.join(self.hparams.root, "bcb_pair_ids.pkl"),
        )

        n = len(dataset)
        self.train_dataset, self.val_dataset, self.test_dataset = dgl.data.utils.split_dataset(dataset, frac_list =[0.6, 0.15, 0.25], shuffle=True)

    def train_dataloader(self):
        # REQUIRED.
        return DataLoader(self.train_dataset,
                          batch_size=self.hparams.batch_size,
                          num_workers=self.hparams.workers,
                          shuffle=True,
                          collate_fn=collate)

    def val_dataloader(self):
        # OPTIONAL
        return DataLoader(self.val_dataset,
                          shuffle=True,
                          batch_size=self.hparams.batch_size,
                          num_workers=self.hparams.workers,
                          collate_fn=collate)

    def test_dataloader(self):
        # OPTIONAL
        return DataLoader(self.test_dataset,
                              batch_size=self.hparams.batch_size,
                              num_workers=self.hparams.workers,
                              collate_fn=collate)

    @staticmethod
    def add_model_specific_args():
        parser = ArgumentParser(add_help=False)

        parser.add_argument('--learning_rate', default=0.0001, type=float)
        parser.add_argument('--batch_size', default=32, type=int)
        parser.add_argument('--workers', default='8', type=int)
        parser.add_argument('--num_classes', default='6', type=int)
        parser.add_argument('--num_features', default='384', type=int)
        parser.add_argument('--hidden_dim', default='284', type=int)

        parser.add_argument('--root', type=str, required=True)

        # training specific (for this model)
        parser.add_argument('--gpus', type=int, default=1, help='how many gpus')

        return parser

In [4]:
params = dict(
    learning_rate=0.0001,
    batch_size=64,
    workers=16,
    num_classes=6,
    num_features=384,
    hidden_dim=284,
    gpu=1,
    root="../data/",
    max_nb_epochs=2
)
from argparse import Namespace

hparams = Namespace(**params)
model = NetBasic(hparams).cuda()

In [5]:
model.prepare_data()
train_loader = model.train_dataloader()
val_loader = model.val_dataloader()

optimizer = model.configure_optimizers()

In [None]:
for epoch in range(1, 201):
    
    model.train()
    losses = []
    for data in train_loader:
        optimizer.zero_grad()
        logs = model.training_step(data)
        loss = logs['loss']
        loss.backward()
        optimizer.step()
        losses.append(loss.item())
        print(f"loss = {np.mean(losses)}", end="\r")
        del loss
    
    
    torch.cuda.empty_cache()
    
    model.eval()
    outputs = []
    for data in val_loader:
        with torch.no_grad():
            log = model.validation_step(data)
            outputs.append(log)
    logs = model.validation_epoch_end(outputs)
    del outputs
    
    print(f"Epoch: {epoch}; loss: {np.mean(losses)}; val_loss: {logs['val_loss']}; val_acc: {logs['val_acc']}")

    torch.save(model.state_dict(), "../data/dgl_play.pt")
    
    torch.cuda.empty_cache()

Epoch: 1; loss: 1.5718147082406966; val_loss: 1.5021113094804588; val_acc: 0.39115289595449854
Epoch: 2; loss: 1.4472227393603716; val_loss: 1.4085114486352845; val_acc: 0.44939022644638493
Epoch: 3; loss: 1.376513532080937; val_loss: 1.363259007837054; val_acc: 0.4504603826843495
Epoch: 4; loss: 1.3500609616764256; val_loss: 1.345457940122446; val_acc: 0.46298982990360676
Epoch: 5; loss: 1.3365954809501523; val_loss: 1.3373464132500528; val_acc: 0.45173523324545817
Epoch: 6; loss: 1.3283379014072523; val_loss: 1.3372333023745941; val_acc: 0.4819904906780959
Epoch: 7; loss: 1.323644720465759; val_loss: 1.32399758345175; val_acc: 0.47689108830352017
Epoch: 8; loss: 1.3181055261789123; val_loss: 1.3268334084723195; val_acc: 0.4696549644376513
Epoch: 9; loss: 1.3154714620178514; val_loss: 1.3189888312827032; val_acc: 0.4795305676855895
Epoch: 10; loss: 1.3129876966033478; val_loss: 1.3170895571271406; val_acc: 0.4990411687105504
Epoch: 11; loss: 1.3113568480548963; val_loss: 1.31520312269

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch: 30; loss: 1.268608954174271; val_loss: 1.2709775689387426; val_acc: 0.5347298034934498
Epoch: 31; loss: 1.265187202190441; val_loss: 1.2692643167150073; val_acc: 0.5369168007217641
Epoch: 32; loss: 1.2621484750606975; val_loss: 1.269699484500302; val_acc: 0.5109170305676856
Epoch: 33; loss: 1.2596573156085822; val_loss: 1.2650365316711658; val_acc: 0.5104178637916865
Epoch: 34; loss: 1.2561735666514746; val_loss: 1.25993197609764; val_acc: 0.5386118134036335
Epoch: 35; loss: 1.2537280668326414; val_loss: 1.2618516311374814; val_acc: 0.5335698689956332
Epoch: 36; loss: 1.251211466033602; val_loss: 1.2580530690313947; val_acc: 0.5326146288209607
Epoch: 37; loss: 1.2489989671550814; val_loss: 1.2571510498180138; val_acc: 0.5302444840883064
loss = 1.2465003420199667

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch: 43; loss: 1.2364188019695177; val_loss: 1.250118097363601; val_acc: 0.5243334865205673
Epoch: 44; loss: 1.2345497346966645; val_loss: 1.2398302693033842; val_acc: 0.5292353770618355
Epoch: 45; loss: 1.233475890511372; val_loss: 1.2384285333375222; val_acc: 0.5447634164422881
Epoch: 46; loss: 1.2311906606121792; val_loss: 1.2439895277460589; val_acc: 0.5460598138222007
Epoch: 47; loss: 1.22978384514324; val_loss: 1.241427165972614; val_acc: 0.5258812629760092
Epoch: 48; loss: 1.2282388257198646; val_loss: 1.2357021898161376; val_acc: 0.5560431509038767
Epoch: 49; loss: 1.2269356030584033; val_loss: 1.2351091027780392; val_acc: 0.5473490289725591
Epoch: 50; loss: 1.2248926827816364; val_loss: 1.2315349206653745; val_acc: 0.5559964664117738
Epoch: 51; loss: 1.223686889025683; val_loss: 1.2376016349771657; val_acc: 0.5267718626942697
Epoch: 52; loss: 1.2221573748875185; val_loss: 1.2263885614132777; val_acc: 0.5506097736837562
Epoch: 53; loss: 1.2203353123586687; val_loss: 1.2318428

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch: 61; loss: 1.209156148420657; val_loss: 1.214129301918646; val_acc: 0.5498376808832827
Epoch: 62; loss: 1.207327799523463; val_loss: 1.2115755843803873; val_acc: 0.5577273902414147
Epoch: 63; loss: 1.205938515897657; val_loss: 1.212170484544929; val_acc: 0.562589778259852
Epoch: 64; loss: 1.2040549038537864; val_loss: 1.2115608063327172; val_acc: 0.5439015455121036
Epoch: 65; loss: 1.2039036597059072; val_loss: 1.2155132270275766; val_acc: 0.5276337336244541
Epoch: 66; loss: 1.2017301977006465; val_loss: 1.2095299282448781; val_acc: 0.53286600789649
Epoch: 67; loss: 1.1997190470252532; val_loss: 1.2040460994670485; val_acc: 0.5599790277959998
Epoch: 68; loss: 1.1987790369596638; val_loss: 1.2045811620341638; val_acc: 0.5481713973799127
Epoch: 69; loss: 1.1975358133759004; val_loss: 1.2023113973796629; val_acc: 0.5633941909631788
loss = 1.1965983022343032

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch: 76; loss: 1.1890501338276056; val_loss: 1.2002802584889674; val_acc: 0.5378756320112137
Epoch: 77; loss: 1.1880046372856599; val_loss: 1.1955050214409308; val_acc: 0.548447914258882
Epoch: 78; loss: 1.1869984207257547; val_loss: 1.1928295297914198; val_acc: 0.5572964549064636
Epoch: 79; loss: 1.1858535543165571; val_loss: 1.199061290145441; val_acc: 0.5245453632034068
Epoch: 80; loss: 1.1846199941113997; val_loss: 1.1903158460121488; val_acc: 0.5569983910785492
loss = 1.1835739525289484

In [None]:
torch.cuda.empty_cache()

In [1]:
model.eval()
outputs = []
for data in test_loader:
    with torch.no_grad():
        log = model.validation_step(data)
        outputs.append(log)
logs = model.validation_epoch_end(outputs)

NameError: name 'model' is not defined