In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from statistics import mean
from create_datasets import *
from sklearn.metrics import accuracy_score

In [2]:
final_edges = np.load('../datasets/final_edges.dump', allow_pickle=True)

In [3]:
data = generate_fingerprints(final_edges)

  0%|          | 0/87153 [00:00<?, ?it/s]

  0%|          | 0/87153 [00:00<?, ?it/s]

  0%|          | 0/87153 [00:00<?, ?it/s]

  i[2] ]))


0it [00:00, ?it/s]

In [173]:
class FNN_net(nn.Module):
    def __init__(self, inp_len=1024, out_len = 256, in_c=1):
        super().__init__()
        self.fc1 = nn.Linear(inp_len, 4056)
        self.fc2 = nn.Linear(4056, 2048)
        self.fc3 = nn.Linear(2048, 512)
        self.dp = nn.Dropout(0.2)
        
        self.decoder = nn.Sequential(
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 32),
            nn.Dropout(0.2),
            nn.Linear(32, out_len),
#             nn.Softmax(0)
        )

    def forward(self, x):
        x= torch.squeeze(x)
        x = F.relu(self.fc1(x))
        x=  self.dp(x)
        x = F.relu(self.fc2(x))
        x=  self.dp(x)
        x = F.relu(self.fc3(x))
        
        x = self.decoder(x)
#         print(x.shape)
        return x

In [174]:
# class CNN_net(nn.Module):
#     def __init__(self, inp_len=1024, out_len = 256, in_c=1):
#         super().__init__()
#         self.conv_block1 = nn.Sequential(
#             nn.Conv1d(in_c, 64, kernel_size=3, stride=1, padding=1),
#             nn.BatchNorm1d(64),
#             nn.ReLU(),
#             nn.Dropout(0.1)
#         )
        
#         self.conv_block2 = nn.Sequential(
#             nn.Conv1d(64, 128, kernel_size=5, stride=1, padding=2),
#             nn.BatchNorm1d(128),
#             nn.ReLU(),
#             nn.Dropout(0.2)
#         )
#         self.conv_block3 = nn.Sequential(
#             nn.Conv1d(128, 256, kernel_size=7, stride=1, padding=3),
#             nn.BatchNorm1d(256),
#             nn.ReLU(),
#             nn.Dropout(0.3)
#         )
# #         self.conv_block4 = nn.Sequential(
# #             nn.Conv1d(256, 512, kernel_size=9, stride=1, padding=4),
# #             nn.BatchNorm1d(512),
# #             nn.ReLU(),
# #             nn.Dropout(0.4)
# #         )
        
#         self.decoder = nn.Sequential(
#             nn.Linear(256*inp_len, 128),
#             nn.Sigmoid(),
#             nn.Linear(128, 32),
#             nn.Dropout(0.2),
#             nn.Linear(32, out_len)
            
#         )

        
#     def forward(self, x):
#         x = self.conv_block1(x)
#         x = self.conv_block2(x)
#         x = self.conv_block3(x)
# #         x = self.conv_block4(x)
#         x = x.view(x.size(0), -1) # flat
#         x = self.decoder(x)
        
#         return x

In [175]:
class FullNet(nn.Module):
    def __init__(self, finger_print_model, graph_embedding_model, combined_model):
        super().__init__()
        self.FP_model = finger_print_model
        self.GE_model = graph_embedding_model
        self.CB_model = combined_model
    
    def forward(self, fp, ge):
        fp_out = self.FP_model(fp)
        ge_out = self.GE_model(ge)
#         print(ge_out.shape)
        inp = torch.cat((fp_out, ge_out), 1)
#         inp = inp.unsqueeze(1)
#         print(inp.shape)
        out = self.CB_model(inp)
        return out

In [191]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [204]:
# device='cpu'

In [205]:
dataset = LinkDataset(data)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train , test = torch.utils.data.random_split(dataset, [train_size, test_size])
BATCH_SIZE = 256
trainloader = DataLoader(train, num_workers = 16, batch_size= BATCH_SIZE)
testloader = DataLoader(test, num_workers = 16, batch_size= BATCH_SIZE)

In [206]:
model = FullNet(FNN_net(inp_len=1024, out_len=256), 
                FNN_net(inp_len=256, out_len =256), 
                FNN_net(inp_len=512, out_len = 2))
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr = 1e-5)
# optimizer = torch.optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)

In [207]:
model

FullNet(
  (FP_model): FNN_net(
    (fc1): Linear(in_features=1024, out_features=4056, bias=True)
    (fc2): Linear(in_features=4056, out_features=2048, bias=True)
    (fc3): Linear(in_features=2048, out_features=512, bias=True)
    (dp): Dropout(p=0.2, inplace=False)
    (decoder): Sequential(
      (0): Linear(in_features=512, out_features=256, bias=True)
      (1): ReLU()
      (2): Linear(in_features=256, out_features=32, bias=True)
      (3): Dropout(p=0.2, inplace=False)
      (4): Linear(in_features=32, out_features=256, bias=True)
    )
  )
  (GE_model): FNN_net(
    (fc1): Linear(in_features=256, out_features=4056, bias=True)
    (fc2): Linear(in_features=4056, out_features=2048, bias=True)
    (fc3): Linear(in_features=2048, out_features=512, bias=True)
    (dp): Dropout(p=0.2, inplace=False)
    (decoder): Sequential(
      (0): Linear(in_features=512, out_features=256, bias=True)
      (1): ReLU()
      (2): Linear(in_features=256, out_features=32, bias=True)
      (3): Dro

In [208]:
# out = model(torch.rand(32, 1, 1024).float().to(device), torch.rand(32, 1, 256).float().to(device))

In [209]:
# out.shape

In [210]:
!nvidia-smi

Wed May 11 12:54:25 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.47.03    Driver Version: 510.47.03    CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  On   | 00000000:02:00.0 Off |                  N/A |
| 29%   51C    P2    59W / 250W |    999MiB / 11264MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA GeForce ...  On   | 00000000:03:00.0 Off |                  N/A |
| 23%   25C    P8     9W / 250W |      4MiB / 11264MiB |      0%      Default |
|       

In [211]:
from prettytable import PrettyTable

def count_parameters(model):
    table = PrettyTable(["Modules", "Parameters"])
    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad: continue
        params = parameter.numel()
        table.add_row([name, params])
        total_params+=params
    print(table)
    print(f"Total Trainable Params: {total_params}")
    return total_params
    
count_parameters(model)

+---------------------------+------------+
|          Modules          | Parameters |
+---------------------------+------------+
|    FP_model.fc1.weight    |  4153344   |
|     FP_model.fc1.bias     |    4056    |
|    FP_model.fc2.weight    |  8306688   |
|     FP_model.fc2.bias     |    2048    |
|    FP_model.fc3.weight    |  1048576   |
|     FP_model.fc3.bias     |    512     |
| FP_model.decoder.0.weight |   131072   |
|  FP_model.decoder.0.bias  |    256     |
| FP_model.decoder.2.weight |    8192    |
|  FP_model.decoder.2.bias  |     32     |
| FP_model.decoder.4.weight |    8192    |
|  FP_model.decoder.4.bias  |    256     |
|    GE_model.fc1.weight    |  1038336   |
|     GE_model.fc1.bias     |    4056    |
|    GE_model.fc2.weight    |  8306688   |
|     GE_model.fc2.bias     |    2048    |
|    GE_model.fc3.weight    |  1048576   |
|     GE_model.fc3.bias     |    512     |
| GE_model.decoder.0.weight |   131072   |
|  GE_model.decoder.0.bias  |    256     |
| GE_model.

35789610

In [212]:
def eval(model,testloader):
    model.eval()
    test_loss = 0.0
    preds = []
    trues = []
    for fp, ge, label in testloader:
        output = model(fp.float().to(device),ge.float().to(device))
        loss = criterion(output, label.float().to(device))
        test_loss+=loss.item()
        for i in range(len(output)):
            pred = output[i].argmax().item()
            true = label[i].argmax().item()
            preds.append(pred)
            trues.append(true)
    model.train()
    print("Accuracy", accuracy_score(preds, trues))
    return accuracy_score(preds, trues), test_loss / len(testloader)

In [213]:
import warnings
warnings.filterwarnings('ignore')

In [214]:
train_losses = []
test_losses = []
num_epochs= 50
best_acc = 0.0
acc_list = []
for epoch in tqdm(range(1, num_epochs)):
    train_loss = 0.0
    model.train()
    batch_id = 0
    for fp, ge, label in trainloader:
        batch_id +=1
        
        optimizer.zero_grad()
        output = model(fp.float().to(device),ge.float().to(device))
        
        loss = criterion(output, label.float().to(device))
        loss.backward()
        optimizer.step()
        train_loss += loss.item() 
    
        print(f'Epoch:{epoch} batch {batch_id}/{len(trainloader)} loss:{loss.item()}', end='\r')
    
    acc, test_loss = eval(model, testloader)
    acc_list.append(acc)

    if acc > best_acc:
        best_acc = acc
        print("Improved Accuracy is", acc )
        torch.save(model, 'SAVED_MODELS/FNN-bestmodel_1.pt')
        
        with open('SAVED_MODELS/FNN-bestmodel_1.txt', 'w') as f:
            print(model.eval() , "Accuracy" , acc, file=f)

    else:
        model = torch.load('SAVED_MODELS/FNN-bestmodel_1.pt')
    
    print()
    print("Train loss: ",train_loss/len(trainloader))
    print("Test  loss: ",test_loss)
    
    train_losses.append(train_loss/len(trainloader))
    test_losses.append(test_loss)

  0%|          | 0/49 [00:00<?, ?it/s]

Accuracy 0.66014571740003450.6789695024490356
Improved Accuracy is 0.6601457174000345

Train loss:  0.6760997082287575
Test  loss:  0.6689197792523149
Accuracy 0.66014571740003450.6765668988227844

Train loss:  0.6625865745893765
Test  loss:  0.6584765859272169
Accuracy 0.66014571740003450.6788615584373474

Train loss:  0.668295851338914
Test  loss:  0.6689197792523149
Accuracy 0.66014571740003450.6803316473960876

Train loss:  0.6682926342164204
Test  loss:  0.6689197792523149
Accuracy 0.66014571740003450.6795712113380432

Train loss:  0.6683242908327571
Test  loss:  0.6689197792523149
Accuracy 0.66014571740003450.6793715953826904

Train loss:  0.668303329866011
Test  loss:  0.6689197792523149
Accuracy 0.66014571740003450.6795013546943665

Train loss:  0.6683069533044166
Test  loss:  0.6689197792523149
Accuracy 0.66014571740003450.6802973747253418

Train loss:  0.6682676439320211
Test  loss:  0.6689197792523149
Accuracy 0.66014571740003450.6803066730499268

Train loss:  0.668277721920

KeyboardInterrupt: 

In [None]:
# F.normalize(output,1)
# output
label

In [None]:
# del model
# del trainloader
# del testloader

In [None]:
import matplotlib.pyplot as plt
plt.plot(train_losses)
plt.plot(test_losses)
plt.plot(acc_list)

In [None]:
model = torch.load('SAVED_MODELS/FNN-bestmodel_1.pt')
eval(model, testloader)

In [None]:
from sklearn.metrics import classification_report, f1_score, confusion_matrix, ConfusionMatrixDisplay
def get_performance(model, testloader):
    model.eval()
    test_loss = 0.0
    preds = []
    trues = []
    for fp, ge, label in testloader:
        output = model(fp.float().to(device),ge.float().to(device))
        loss = criterion(output, label.float().to(device))
        test_loss+=loss.item()
        for i in range(len(output)):
            pred = output[i].argmax().item()
            true = label[i].argmax().item()
            preds.append(pred)
            trues.append(true)
    model.train()
    print("Accuracy", accuracy_score(preds, trues))
    print("f1 score", f1_score(preds, trues))
    print(classification_report(trues, preds, labels=[0,1]))
    print()
    cm = confusion_matrix(trues, preds, labels=[0,1])
    disp = ConfusionMatrixDisplay(cm, np.array([0,1]))
    disp.plot()
    plt.show()

In [None]:
get_performance(model, testloader)

In [None]:
# torch.save(model, 'CNN-model-acc-0.888.pt')
# with open('CNN-model-acc-0.888.txt', 'w') as f:
#     print(model.eval(), file=f)

In [None]:
print(model.parameters())

In [None]:
import gc
gc.collect()
torch.cuda.empty_cache()