In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from statistics import mean
from create_datasets import *
from sklearn.metrics import accuracy_score

In [2]:
final_edges = np.load('../datasets/final_edges.dump', allow_pickle=True)

In [None]:
data = generate_fingerprints(final_edges)

  0%|          | 0/87153 [00:00<?, ?it/s]

In [4]:
class FNN_net(nn.Module):
    def __init__(self, inp_len=1024, out_len = 256, in_c=1):
        super().__init__()
        self.fc1 = nn.Linear(inp_len, 4056)
        self.fc2 = nn.Linear(4056, 2048)
        self.fc3 = nn.Linear(2048, 512)
        self.dp = nn.Dropout(0.2)
        
        self.decoder = nn.Sequential(
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 32),
            nn.Dropout(0.2),
            nn.Linear(32, out_len),
#             nn.Softmax(0)
        )

    def forward(self, x):
        x= torch.squeeze(x)
        x = F.relu(self.fc1(x))
        x=  self.dp(x)
        x = F.relu(self.fc2(x))
        x=  self.dp(x)
        x = F.relu(self.fc3(x))
        
        x = self.decoder(x)
#         print(x.shape)
        return x

In [5]:
class FullNet(nn.Module):
    def __init__(self, finger_print_model, graph_embedding_model, combined_model):
        super().__init__()
        self.FP_model = finger_print_model
        self.GE_model = graph_embedding_model
        self.CB_model = combined_model
    
    def forward(self, fp, ge):
        fp_out = self.FP_model(fp)
        ge_out = self.GE_model(ge)
#         print(ge_out.shape)
        inp = torch.cat((fp_out, ge_out), 1)
#         inp = inp.unsqueeze(1)
#         print(inp.shape)
        out = self.CB_model(inp)
        return out

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [7]:
dataset = LinkDataset(data)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train , test = torch.utils.data.random_split(dataset, [train_size, test_size])
BATCH_SIZE = 256
trainloader = DataLoader(train, num_workers = 16, batch_size= BATCH_SIZE)
testloader = DataLoader(test, num_workers = 16, batch_size= BATCH_SIZE)

In [8]:
model = FullNet(FNN_net(inp_len=1024, out_len=256), 
                FNN_net(inp_len=256, out_len =256), 
                FNN_net(inp_len=512, out_len = 2))
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr = 1e-5)
# optimizer = torch.optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)

In [9]:
model

FullNet(
  (FP_model): FNN_net(
    (fc1): Linear(in_features=1024, out_features=4056, bias=True)
    (fc2): Linear(in_features=4056, out_features=2048, bias=True)
    (fc3): Linear(in_features=2048, out_features=512, bias=True)
    (dp): Dropout(p=0.2, inplace=False)
    (decoder): Sequential(
      (0): Linear(in_features=512, out_features=256, bias=True)
      (1): ReLU()
      (2): Linear(in_features=256, out_features=32, bias=True)
      (3): Dropout(p=0.2, inplace=False)
      (4): Linear(in_features=32, out_features=256, bias=True)
    )
  )
  (GE_model): FNN_net(
    (fc1): Linear(in_features=256, out_features=4056, bias=True)
    (fc2): Linear(in_features=4056, out_features=2048, bias=True)
    (fc3): Linear(in_features=2048, out_features=512, bias=True)
    (dp): Dropout(p=0.2, inplace=False)
    (decoder): Sequential(
      (0): Linear(in_features=512, out_features=256, bias=True)
      (1): ReLU()
      (2): Linear(in_features=256, out_features=32, bias=True)
      (3): Dro

In [10]:
!nvidia-smi

Tue May 17 15:17:21 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.95.01    Driver Version: 440.95.01    CUDA Version: 10.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce GTX 108...  Off  | 00000000:02:00.0 Off |                  N/A |
| 23%   36C    P2    57W / 250W |    965MiB / 11178MiB |      8%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage    

In [11]:
from prettytable import PrettyTable

def count_parameters(model):
    table = PrettyTable(["Modules", "Parameters"])
    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad: continue
        params = parameter.numel()
        table.add_row([name, params])
        total_params+=params
    print(table)
    print(f"Total Trainable Params: {total_params}")
    return total_params
    
count_parameters(model)

+---------------------------+------------+
|          Modules          | Parameters |
+---------------------------+------------+
|    FP_model.fc1.weight    |  4153344   |
|     FP_model.fc1.bias     |    4056    |
|    FP_model.fc2.weight    |  8306688   |
|     FP_model.fc2.bias     |    2048    |
|    FP_model.fc3.weight    |  1048576   |
|     FP_model.fc3.bias     |    512     |
| FP_model.decoder.0.weight |   131072   |
|  FP_model.decoder.0.bias  |    256     |
| FP_model.decoder.2.weight |    8192    |
|  FP_model.decoder.2.bias  |     32     |
| FP_model.decoder.4.weight |    8192    |
|  FP_model.decoder.4.bias  |    256     |
|    GE_model.fc1.weight    |  1038336   |
|     GE_model.fc1.bias     |    4056    |
|    GE_model.fc2.weight    |  8306688   |
|     GE_model.fc2.bias     |    2048    |
|    GE_model.fc3.weight    |  1048576   |
|     GE_model.fc3.bias     |    512     |
| GE_model.decoder.0.weight |   131072   |
|  GE_model.decoder.0.bias  |    256     |
| GE_model.

35789610

In [12]:
def eval(model,testloader):
    model.eval()
    test_loss = 0.0
    preds = []
    trues = []
    for fp, ge, label in testloader:
        output = model(fp.float().to(device),ge.float().to(device))
        loss = criterion(output, label.float().to(device))
        test_loss+=loss.item()
        for i in range(len(output)):
            pred = output[i].argmax().item()
            true = label[i].argmax().item()
            preds.append(pred)
            trues.append(true)
    model.train()
    print("Accuracy", accuracy_score(preds, trues))
    return accuracy_score(preds, trues), test_loss / len(testloader)

In [13]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
train_losses = []
test_losses = []
num_epochs= 50
best_acc = 0.0
acc_list = []
for epoch in tqdm(range(1, num_epochs)):
    train_loss = 0.0
    model.train()
    batch_id = 0
    for fp, ge, label in trainloader:
        batch_id +=1
        
        optimizer.zero_grad()
        output = model(fp.float().to(device),ge.float().to(device))
        
        loss = criterion(output, label.float().to(device))
        loss.backward()
        optimizer.step()
        train_loss += loss.item() 
    
        print(f'Epoch:{epoch} batch {batch_id}/{len(trainloader)} loss:{loss.item()}', end='\r')
    
    acc, test_loss = eval(model, testloader)
    acc_list.append(acc)

    if acc > best_acc:
        best_acc = acc
        print("Improved Accuracy is", acc )
#         torch.save(model, 'SAVED_MODELS/FNN-bestmodel_1.pt')
        
#         with open('SAVED_MODELS/FNN-bestmodel_1.txt', 'w') as f:
#             print(model.eval() , "Accuracy" , acc, file=f)

    else:
#         model = torch.load('SAVED_MODELS/FNN-bestmodel_1.pt')
    
    print()
    print("Train loss: ",train_loss/len(trainloader))
    print("Test  loss: ",test_loss)
    
    train_losses.append(train_loss/len(trainloader))
    test_losses.append(test_loss)