In [66]:
import torch
import torch.nn.functional as F
%pip install torch_geometric
from torch_geometric.nn import SAGEConv
import os
import json
import os
import pickle
import json
import random
import numpy as np
from torch_geometric.data import Data
import networkx as nx
import warnings
import math
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from torch.utils.data.dataloader import default_collate
from torch.utils.data import random_split

Note: you may need to restart the kernel to use updated packages.


In [251]:
def get_files_in_folder(input_folder):
    file_list = []
    for file_name in os.listdir(input_folder):
        file_path = os.path.join(input_folder, file_name)
        if os.path.isfile(file_path):
            file_list.append(file_path)
    return file_list

# Example usage:
folder_path = 'done_all'
verilog_files = get_files_in_folder(folder_path)
print(len(verilog_files))
print(verilog_files)

396
['done_all\\adder10_synth.txt', 'done_all\\adder11_synth.txt', 'done_all\\adder12_synth.txt', 'done_all\\adder13_synth.txt', 'done_all\\adder14_synth.txt', 'done_all\\adder15_synth.txt', 'done_all\\adder16_synth.txt', 'done_all\\adder17_synth.txt', 'done_all\\adder18_synth.txt', 'done_all\\adder19_synth.txt', 'done_all\\adder1_synth.txt', 'done_all\\adder20_synth.txt', 'done_all\\adder21_synth.txt', 'done_all\\adder22_synth.txt', 'done_all\\adder23_synth.txt', 'done_all\\adder24_synth.txt', 'done_all\\adder25_synth.txt', 'done_all\\adder26_synth.txt', 'done_all\\adder27_synth.txt', 'done_all\\adder28_synth.txt', 'done_all\\adder2_synth.txt', 'done_all\\adder3_synth.txt', 'done_all\\adder4_synth.txt', 'done_all\\adder5_synth.txt', 'done_all\\adder6_synth.txt', 'done_all\\adder7_synth.txt', 'done_all\\adder8_synth.txt', 'done_all\\adder9_synth.txt', 'done_all\\and10_gate_synth.txt', 'done_all\\and11_gate_synth.txt', 'done_all\\and12_gate_synth.txt', 'done_all\\and13_synth.txt', 'done

In [252]:
def extracting_attributes(verilog_file):
    try:
        if os.path.isfile(verilog_file):
            with open(verilog_file, "r") as file:
                loaded_data = json.load(file)
                nodes = loaded_data[0]
                edges = loaded_data[1]
                label = loaded_data[2]
                
                x = torch.tensor(nodes, dtype=torch.float)
                edge_index = torch.tensor(edges, dtype=torch.long)
                y = torch.tensor(label, dtype=torch.float)
                num_nodes = x.size(0)
                
                # Create batch assignment vector (assuming one graph per file)
                batch = torch.zeros(num_nodes, dtype=torch.long)
                data = Data(x=x, edge_index=edge_index, y = y, batch = batch)
                return data
    
    except Exception as e:
        print(e)
        return e


In [253]:
class VerilogDataset(Dataset):  # Using Dataset from torch_geometric
    def __init__(self, verilog_files):
        print(f"Loaded {len(verilog_files)} Verilog files.")
        self.verilog_files = verilog_files

    def __len__(self):
        return len(self.verilog_files)

    def __getitem__(self, idx):
        verilog_file = self.verilog_files[idx]
        data = extracting_attributes(verilog_file)
        return data

dataset = VerilogDataset(verilog_files)
print(len(dataset))

Loaded 396 Verilog files.
396


In [254]:
print(dataset[0])
print(verilog_files[0])
print(dataset.verilog_files[0])

Data(x=[54, 7], edge_index=[2, 72], y=[1, 16], batch=[54])
done_all\adder10_synth.txt
done_all\adder10_synth.txt


In [255]:
def are_all_data_objects_unique(dataset):
    data_objects = []
    for data in dataset:
        if data in data_objects:
            return False
        data_objects.append(data)
    return True

# Example usage:
is_unique = are_all_data_objects_unique(dataset)
if is_unique:
    print("All data objects are unique.")
else:
    print("Duplicate data objects found.")


All data objects are unique.


In [256]:
y_labels = []
for data in dataset:
    # print(data)
    # print(data.y.tolist())
    y_labels.append(np.argmax(data.y.tolist()))

In [257]:
def custom_collate(batch):
    if isinstance(batch[0], Data):
        return batch
    else:
        return default_collate(batch)
    


In [258]:
X_train, X_test, y_train, y_test = train_test_split(dataset, y_labels, test_size=0.2, stratify = y_labels, random_state=41)
train_loader = DataLoader(X_train, batch_size=16, shuffle=True, collate_fn=custom_collate)
test_loader = DataLoader(X_test, batch_size=16, shuffle = False, collate_fn=custom_collate)

In [259]:
# len(train_loader.dataset)
print(train_loader.dataset[0])

Data(x=[82, 7], edge_index=[2, 109], y=[1, 16], batch=[82])


In [271]:
from torch_geometric.nn import global_mean_pool
class GraphSAGE(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels):
        super(GraphSAGE, self).__init__()
        self.conv1 = SAGEConv(7, hidden_channels)
        self.conv2 = SAGEConv(hidden_channels, out_channels)
        

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x_temp = self.conv2(x, edge_index)
        x = global_mean_pool(x_temp, batch=None)
        return x, x_temp


In [272]:
# def generate_pairs(dataset):
#     pairs = []
#     pair_labels = []
#     for i in range(len(dataset)):
#         for j in range(i + 1, len(dataset)):
#             pairs.append((dataset[i], dataset[j]))
#             pair_labels.append(1 if torch.all(dataset[i].y == dataset[j].y).tolist() else 0)
#     return pairs, pair_labels

# # Example usage
# pairs, pair_labels = generate_pairs(X_train)


In [273]:
# pairs[0]

In [299]:
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_mean_pool

class SiameseGNN(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels):
        super(SiameseGNN, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = GCNConv(16, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        # x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        # print(x)
        x = global_mean_pool(x, data.batch)  # [batch_size, hidden_channels]
        return x

    # def compute_similarity(self, data1, data2):
    #     emb1 = self.forward(data1)
    #     emb2 = self.forward(data2)
    #     return F.cosine_similarity(emb1, emb2), emb1, emb2


In [278]:
class Hybrid_Model(torch.nn.Module):
    def __init__(self):
        super(Hybrid_Model, self).__init__()
        hidden_channels = 16
        num_classes = 16
        self.sage = GraphSAGE(hidden_channels, 16)
        self.siamese = SiameseGNN(hidden_channels, num_classes)
    
    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        # print(f"Input x shape: {x.shape}")  # Debugging shape of input features
        x, x_temp = self.sage(data)
        # print(f"SAGE output x shape: {x.shape}, x_temp shape: {x_temp.shape}")  # Debugging shape after GraphSAGE
        # data.x = x_temp
        # print(f"Data x shape after SAGE: {data.x.shape}")
        x = self.siamese(x_temp, edge_index)
        # print(f"Siamese output shape: {x.shape}")  # Debugging shape after SiameseGNN
        return x
    
    def compute_similarity(self, data1, data2):
        emb1 = self.forward(data1)
        emb2 = self.forward(data2)
        return F.cosine_similarity(emb1, emb2), emb1, emb2
    

In [265]:
# def contrastive_loss(similarity, label, margin=0.5):
#     loss = (label * (1 - similarity)**2 + (1 - label) * F.relu(similarity - margin)**2).mean()
#     return loss


In [288]:
# Initialize the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
hidden_channels = 16
num_classes = 16
sage_model = GraphSAGE(hidden_channels, num_classes)
# Create a simple training loop

optimizer = torch.optim.Adam(sage_model.parameters(), lr=0.01)
loss_fn = torch.nn.CrossEntropyLoss()



# Training the model
sage_model.train()
for epoch in range(100):
    total_loss = 0
    correct = 0
    for data in X_train:
        optimizer.zero_grad()
        
        data = data.to(device)
        out, _ = sage_model(data)
        loss =loss_fn(out, data.y)
        loss.backward()
        # update the weights
        
        optimizer.step()
        total_loss += loss.item()
        
        pred = out.argmax(dim=1)  # Use the class with highest probability.
        y_label = (data.y.tolist())
        y_label = y_label[0].index(1.0)
        pred_label = (pred.tolist())[0]
        # print(pred_label)
        # print(y_label)
        if y_label == pred_label:
            correct += 1            
    train_acc = correct/len(X_train)
    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {total_loss/len(X_train)}, Train Acc: {train_acc:.4f}')


Epoch 0, Loss: 2.4707392272126825, Train Acc: 0.2025
Epoch 10, Loss: 0.8680489645783475, Train Acc: 0.7342
Epoch 20, Loss: 0.6265674432971735, Train Acc: 0.8133
Epoch 30, Loss: 0.5063398475447612, Train Acc: 0.8481
Epoch 40, Loss: 0.4390327516641175, Train Acc: 0.8608
Epoch 50, Loss: 0.3932381877526125, Train Acc: 0.8703
Epoch 60, Loss: 0.35720610774686196, Train Acc: 0.8861
Epoch 70, Loss: 0.3258894543317542, Train Acc: 0.8924
Epoch 80, Loss: 0.29915498563918175, Train Acc: 0.9019
Epoch 90, Loss: 0.27756248018375196, Train Acc: 0.9114


In [290]:
train_acc

0.9177215189873418

In [291]:
sage_model.eval()
correct = 0
with torch.no_grad():
    for data in X_test:
        out,_ = sage_model(data)  
        pred = out.argmax(dim=1)  # Use the class with highest probability.
        y_label = (data.y.tolist())
        y_label = y_label[0].index(1.0)
        pred_label = (pred.tolist())[0]
        # print(pred_label)
        # print(y_label)
        if y_label == pred_label:
            correct += 1            
        # correct += int((pred == data.y).sum())  # Check against ground-truth labels.
    acc = correct / len(X_test)  # Derive ratio of correct predictions.

print(f'Test Accuracy: {acc:.4f}')


Test Accuracy: 0.8500


In [292]:
# torch.save(sage_model.state_dict(), 'grahpSAGE91_85_100_siamese.pth')

In [295]:
sage_model = GraphSAGE(hidden_channels, num_classes)
sage_model.load_state_dict(torch.load('grahpSAGE90_87_100_siamese.pth'))



<All keys matched successfully>

In [303]:
# Initialize the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
hidden_channels = 16
num_classes = 16
siamese_model = SiameseGNN(hidden_channels, num_classes)
# Create a simple training loop

optimizer = torch.optim.Adam(siamese_model.parameters(), lr=0.01)
loss_fn = torch.nn.CrossEntropyLoss()

sage_model.eval()



# Training the model
siamese_model.train()
for epoch in range(100):
    total_loss = 0
    correct = 0
    for data in X_train:
        optimizer.zero_grad()
        out,emb = sage_model(data)  
        # print(emb.shape)
        # break
        data = data.to(device)
        out = siamese_model(emb, data.edge_index)
        loss =loss_fn(out, data.y)
        loss.backward()
        # update the weights
        
        optimizer.step()
        total_loss += loss.item()
        
        pred = out.argmax(dim=1)  # Use the class with highest probability.
        y_label = (data.y.tolist())
        y_label = y_label[0].index(1.0)
        pred_label = (pred.tolist())[0]
        # print(pred_label)
        # print(y_label)
        if y_label == pred_label:
            correct += 1            
    # break
    train_acc = correct/len(X_train)
    if epoch % 10 == 0 or epoch == 99:
        print(f'Epoch {epoch}, Loss: {total_loss/len(X_train)}, Train Acc: {train_acc:.4f}')


Epoch 0, Loss: 6.294285904708637, Train Acc: 0.2943
Epoch 10, Loss: 1.411292207202643, Train Acc: 0.5411
Epoch 20, Loss: 1.1445755970017422, Train Acc: 0.6361
Epoch 30, Loss: 0.9778353791078717, Train Acc: 0.6519
Epoch 40, Loss: 0.8221197267764724, Train Acc: 0.7278
Epoch 50, Loss: 1.007420621106803, Train Acc: 0.6361
Epoch 60, Loss: 0.9233191209988623, Train Acc: 0.6551
Epoch 70, Loss: 0.9368255968798633, Train Acc: 0.6392
Epoch 80, Loss: 0.8753841897201862, Train Acc: 0.6709
Epoch 90, Loss: 0.8378846690696073, Train Acc: 0.6930
Epoch 99, Loss: 0.9430141320811107, Train Acc: 0.6582


In [None]:
# siamese_model.eval()
# correct = 0
# with torch.no_grad():
#     for data in X_test:
#         out,_ = siamese_model(data)  
#         pred = out.argmax(dim=1)  # Use the class with highest probability.
#         y_label = (data.y.tolist())
#         y_label = y_label[0].index(1.0)
#         pred_label = (pred.tolist())[0]
#         # print(pred_label)
#         # print(y_label)
#         if y_label == pred_label:
#             correct += 1            
#         # correct += int((pred == data.y).sum())  # Check against ground-truth labels.
#     acc = correct / len(X_test)  # Derive ratio of correct predictions.

# print(f'Test Accuracy: {acc:.4f}')


In [None]:
torch.save(siamese_model.state_dict(), 'siamese_100_sage.pth')

In [279]:
# Initialize the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
hybrid_model = Hybrid_Model()
# Create a simple training loop

optimizer = torch.optim.Adam(hybrid_model.parameters(), lr=0.01)
loss_fn = torch.nn.CrossEntropyLoss()


embeddings = []
# Training the model
hybrid_model.train()
for epoch in range(100):
    total_loss = 0
    # for i, (data1, data2) in enumerate(pairs):
    for data in X_train:
        optimizer.zero_grad()
        # data1 = data1.to(device)
        # data2 = data2.to(device)
        data = data.to(device)
        # print(data1)
        #forward pass
        out = hybrid_model(data)
        # print(similarity)
        # print(loss)
        # loss = contrastive_loss(similarity, pair_labels[i])
        # zero the gradients of the weights so that the gradients are not accumulated
        # calculate the gradients using backpropagation
        # print(out)
        # print(data.y)
        loss =loss_fn(out, data.y)
        loss.backward()
        # update the weights
        
        optimizer.step()
        total_loss += loss.item()
    
    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {total_loss/len(X_train)}')


Epoch 0, Loss: 2.4858641728192947
Epoch 10, Loss: 0.9759160267614005
Epoch 20, Loss: 0.7507696524236805
Epoch 30, Loss: 0.643648432793802
Epoch 40, Loss: 0.5761470738562112
Epoch 50, Loss: 0.4995561326920692
Epoch 60, Loss: 0.43065840111303866
Epoch 70, Loss: 0.34306453362453065
Epoch 80, Loss: 0.27793076737033356
Epoch 90, Loss: 0.22167794258981163
