In [1]:
import numpy as np
from collections import Counter
from tqdm import tqdm
from matplotlib import pyplot as plt
from sklearn.metrics import classification_report 
from create_datasets import *

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader


In [2]:
final_edges = np.load('../datasets/final_edges.dump', allow_pickle=True)
data = generate_fingerprints(final_edges)

  0%|          | 0/87153 [00:00<?, ?it/s]

  0%|          | 0/87153 [00:00<?, ?it/s]

  0%|          | 0/87153 [00:00<?, ?it/s]

  i[2]


0it [00:00, ?it/s]

In [3]:
class CRNN(nn.Module):

    def __init__(self, in_channels, out_channels, n_len_seg, n_classes, device, verbose=False):
        super(CRNN, self).__init__()
        
        self.n_len_seg = n_len_seg
        self.n_classes = n_classes
        self.in_channels = in_channels
        self.out_channels = out_channels

        self.device = device
        self.verbose = verbose

        # (batch, channels, length)
        self.cnn = nn.Conv1d(in_channels=self.in_channels, 
                            out_channels=self.out_channels, 
                            kernel_size=16, 
                            stride=2)
        # (batch, seq, feature)
        self.rnn = nn.LSTM(input_size=(self.out_channels), 
                            hidden_size=self.out_channels, 
                            num_layers=1, 
                            batch_first=True, 
                            bidirectional=False)
        self.dense1 = nn.Linear(out_channels, 128)
        self.dropout1 = nn.Dropout(0.2)
        self.dense2 = nn.Linear(128, n_classes)
        self.dropout2 = nn.Dropout(0.2)
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):

        self.n_channel, self.n_length = x.shape[-2], x.shape[-1]
        self.n_seg = self.n_length // self.n_len_seg
        out = x
        out = out.permute(0,2,1)
        out = out.view(-1, self.n_len_seg, self.n_channel)
        out = out.permute(0,2,1)
        out = self.cnn(out)
        out= self.dropout1(out)
        out = out.mean(-1)
        out = out.view(-1, self.n_seg, self.out_channels)
        _, (out, _) = self.rnn(out)
        out = torch.squeeze(out, dim=0)
        out = self.dense1(out)
        out= self.dropout2(out)
        out = self.dense2(out)
        out = self.softmax(out)
        return out

In [4]:
class FullNet(nn.Module):
    def __init__(self, finger_print_model, graph_embedding_model, combined_model):
        super().__init__()
        self.FP_model = finger_print_model
        self.GE_model = graph_embedding_model
        self.CB_model = combined_model
    
    def forward(self, fp, ge):
        fp_out = self.FP_model(fp)
        ge_out = self.GE_model(ge)
        inp = torch.cat((fp_out, ge_out), 1)
        inp = inp.unsqueeze(1)
        out = self.CB_model(inp)
        return out

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
# device = 'cpu'

cuda


In [6]:
model = FullNet(CRNN(1,256,1024,256, device), 
                CRNN(1,64,256,256, device), 
                CRNN(1,128,512,2,device))

In [7]:
model.to(device)

FullNet(
  (FP_model): CRNN(
    (cnn): Conv1d(1, 256, kernel_size=(16,), stride=(2,))
    (rnn): LSTM(256, 256, batch_first=True)
    (dense1): Linear(in_features=256, out_features=128, bias=True)
    (dropout1): Dropout(p=0.2, inplace=False)
    (dense2): Linear(in_features=128, out_features=256, bias=True)
    (dropout2): Dropout(p=0.2, inplace=False)
    (softmax): Softmax(dim=1)
  )
  (GE_model): CRNN(
    (cnn): Conv1d(1, 64, kernel_size=(16,), stride=(2,))
    (rnn): LSTM(64, 64, batch_first=True)
    (dense1): Linear(in_features=64, out_features=128, bias=True)
    (dropout1): Dropout(p=0.2, inplace=False)
    (dense2): Linear(in_features=128, out_features=256, bias=True)
    (dropout2): Dropout(p=0.2, inplace=False)
    (softmax): Softmax(dim=1)
  )
  (CB_model): CRNN(
    (cnn): Conv1d(1, 128, kernel_size=(16,), stride=(2,))
    (rnn): LSTM(128, 128, batch_first=True)
    (dense1): Linear(in_features=128, out_features=128, bias=True)
    (dropout1): Dropout(p=0.2, inplace=Fal

In [8]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr = 1e-4)

In [9]:
from prettytable import PrettyTable

def count_parameters(model):
    table = PrettyTable(["Modules", "Parameters"])
    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad: continue
        params = parameter.numel()
        table.add_row([name, params])
        total_params+=params
    print(table)
    print(f"Total Trainable Params: {total_params}")
    return total_params
    
count_parameters(model)

+---------------------------+------------+
|          Modules          | Parameters |
+---------------------------+------------+
|    FP_model.cnn.weight    |    4096    |
|     FP_model.cnn.bias     |    256     |
| FP_model.rnn.weight_ih_l0 |   262144   |
| FP_model.rnn.weight_hh_l0 |   262144   |
|  FP_model.rnn.bias_ih_l0  |    1024    |
|  FP_model.rnn.bias_hh_l0  |    1024    |
|   FP_model.dense1.weight  |   32768    |
|    FP_model.dense1.bias   |    128     |
|   FP_model.dense2.weight  |   32768    |
|    FP_model.dense2.bias   |    256     |
|    GE_model.cnn.weight    |    1024    |
|     GE_model.cnn.bias     |     64     |
| GE_model.rnn.weight_ih_l0 |   16384    |
| GE_model.rnn.weight_hh_l0 |   16384    |
|  GE_model.rnn.bias_ih_l0  |    256     |
|  GE_model.rnn.bias_hh_l0  |    256     |
|   GE_model.dense1.weight  |    8192    |
|    GE_model.dense1.bias   |    128     |
|   GE_model.dense2.weight  |   32768    |
|    GE_model.dense2.bias   |    256     |
|    CB_mod

823362

In [10]:
dataset = LinkDataset(data)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train , test = torch.utils.data.random_split(dataset, [train_size, test_size])
BATCH_SIZE = 512
trainloader = DataLoader(train, num_workers = 16, batch_size= BATCH_SIZE, shuffle=True)
testloader = DataLoader(test, num_workers = 16, batch_size= BATCH_SIZE)

In [11]:
from sklearn.metrics import accuracy_score

In [12]:
def eval(model,testloader):
    model.eval()
    test_loss = 0.0
    preds = []
    trues = []
    for fp, ge, label in testloader:
        output = model(fp.float().to(device),ge.float().to(device))
        loss = criterion(output, label.float().to(device))
        test_loss+=loss.item()
        for i in range(len(output)):
            pred = output[i].argmax().item()
            true = label[i].argmax().item()
            preds.append(pred)
            trues.append(true)
    model.train()
    print("Accuracy", accuracy_score(preds, trues))
    return test_loss / len(testloader)

In [None]:
train_losses = []
test_losses = []
num_epochs= 50
for epoch in tqdm(range(1, num_epochs)):
    train_loss = 0.0
    model.train()
    batch_id = 0
    for fp, ge, label in trainloader:
        batch_id +=1
        
        optimizer.zero_grad()
        output = model(fp.float().to(device),ge.float().to(device))
        loss = criterion(output, label.float().to(device))
        loss.backward()
        optimizer.step()
        train_loss += loss.item() 
    
        print(f'Epoch:{epoch} batch {batch_id}/{len(trainloader)} loss:{loss.item()}', end='\r')
    
    test_loss = eval(model, testloader)
    print()
    print("Train loss: ",train_loss/len(trainloader))
    print("Test  loss: ",test_loss)
    
    train_losses.append(train_loss/len(trainloader))
    test_losses.append(test_loss)

  0%|          | 0/49 [00:00<?, ?it/s]

Epoch:1 batch 137/137 loss:0.7015280723571777

In [None]:
import matplotlib.pyplot as plt
plt.plot(train_losses)
plt.plot(test_losses)

In [None]:
eval(model, testloader)