In [1]:
import torch
import torch.nn.functional as F
%pip install torch_geometric
from torch_geometric.nn import SAGEConv
import os
import json
import os
import pickle
import json
import random
import numpy as np
from torch_geometric.data import Data
import networkx as nx
import warnings
import math
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from torch.utils.data.dataloader import default_collate
from torch.utils.data import random_split

Note: you may need to restart the kernel to use updated packages.


In [2]:
def get_files_in_folder(input_folder):
    file_list = []
    for file_name in os.listdir(input_folder):
        file_path = os.path.join(input_folder, file_name)
        if os.path.isfile(file_path):
            file_list.append(file_path)
    return file_list

# Example usage:
folder_path = 'done_all'
verilog_files = get_files_in_folder(folder_path)
print(len(verilog_files))
print(verilog_files)

396
['done_all\\adder10_synth.txt', 'done_all\\adder11_synth.txt', 'done_all\\adder12_synth.txt', 'done_all\\adder13_synth.txt', 'done_all\\adder14_synth.txt', 'done_all\\adder15_synth.txt', 'done_all\\adder16_synth.txt', 'done_all\\adder17_synth.txt', 'done_all\\adder18_synth.txt', 'done_all\\adder19_synth.txt', 'done_all\\adder1_synth.txt', 'done_all\\adder20_synth.txt', 'done_all\\adder21_synth.txt', 'done_all\\adder22_synth.txt', 'done_all\\adder23_synth.txt', 'done_all\\adder24_synth.txt', 'done_all\\adder25_synth.txt', 'done_all\\adder26_synth.txt', 'done_all\\adder27_synth.txt', 'done_all\\adder28_synth.txt', 'done_all\\adder2_synth.txt', 'done_all\\adder3_synth.txt', 'done_all\\adder4_synth.txt', 'done_all\\adder5_synth.txt', 'done_all\\adder6_synth.txt', 'done_all\\adder7_synth.txt', 'done_all\\adder8_synth.txt', 'done_all\\adder9_synth.txt', 'done_all\\and10_gate_synth.txt', 'done_all\\and11_gate_synth.txt', 'done_all\\and12_gate_synth.txt', 'done_all\\and13_synth.txt', 'done

In [3]:
def extracting_attributes(verilog_file):
    try:
        if os.path.isfile(verilog_file):
            with open(verilog_file, "r") as file:
                loaded_data = json.load(file)
                nodes = loaded_data[0]
                edges = loaded_data[1]
                label = loaded_data[2]
                
                x = torch.tensor(nodes, dtype=torch.float)
                edge_index = torch.tensor(edges, dtype=torch.long)
                y = torch.tensor(label, dtype=torch.float)
                num_nodes = x.size(0)
                
                # Create batch assignment vector (assuming one graph per file)
                batch = torch.zeros(num_nodes, dtype=torch.long)
                data = Data(x=x, edge_index=edge_index, y = y, batch = batch)
                return data
    
    except Exception as e:
        print(e)
        return e


In [4]:
class VerilogDataset(Dataset):  # Using Dataset from torch_geometric
    def __init__(self, verilog_files):
        print(f"Loaded {len(verilog_files)} Verilog files.")
        self.verilog_files = verilog_files

    def __len__(self):
        return len(self.verilog_files)

    def __getitem__(self, idx):
        verilog_file = self.verilog_files[idx]
        data = extracting_attributes(verilog_file)
        return data

dataset = VerilogDataset(verilog_files)
print(len(dataset))

Loaded 396 Verilog files.
396


In [5]:
print(dataset[0])
print(verilog_files[0])
print(dataset.verilog_files[0])

Data(x=[54, 7], edge_index=[2, 72], y=[1, 16], batch=[54])
done_all\adder10_synth.txt
done_all\adder10_synth.txt


In [6]:
def are_all_data_objects_unique(dataset):
    data_objects = []
    for data in dataset:
        if data in data_objects:
            return False
        data_objects.append(data)
    return True

# Example usage:
is_unique = are_all_data_objects_unique(dataset)
if is_unique:
    print("All data objects are unique.")
else:
    print("Duplicate data objects found.")


All data objects are unique.


In [7]:
y_labels = []
for data in dataset:
    # print(data)
    # print(data.y.tolist())
    y_labels.append(np.argmax(data.y.tolist()))

In [8]:
def custom_collate(batch):
    if isinstance(batch[0], Data):
        return batch
    else:
        return default_collate(batch)
    


In [9]:
X_train, X_test, y_train, y_test = train_test_split(dataset, y_labels, test_size=0.2, stratify = y_labels, random_state=41)
train_loader = DataLoader(X_train, batch_size=16, shuffle=True, collate_fn=custom_collate)
test_loader = DataLoader(X_test, batch_size=16, shuffle = False, collate_fn=custom_collate)

In [10]:
# len(train_loader.dataset)
print(train_loader.dataset[0])

Data(x=[82, 7], edge_index=[2, 109], y=[1, 16], batch=[82])


In [11]:
loader_iter = iter(train_loader)
batch = next(loader_iter)
# print(batch)
# print(batch.num_graphs)

In [13]:
from torch_geometric.nn import global_mean_pool
class GraphSAGE1(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GraphSAGE1, self).__init__()
        self.conv1 = SAGEConv(in_channels, hidden_channels)
        self.conv2 = SAGEConv(hidden_channels, out_channels)
        

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        emb = self.conv2(x, edge_index)
        x = global_mean_pool(emb, batch=None)
        return x, emb


In [16]:
# Initialize the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
in_channels = 7
hidden_channels = 16
out_channels = 16
model1 = GraphSAGE1(in_channels, hidden_channels, out_channels)

# Create a simple training loop
optimizer = torch.optim.Adam(model1.parameters(), lr=0.01)
loss_fn = torch.nn.CrossEntropyLoss()


# Training the model
model1.train()
for epoch in range(100):
    for data in X_train:
        optimizer.zero_grad()
        data = data.to(device)
        #forward pass
        out, emb = model1(data.x, data.edge_index)
        # print(out.shape)
        # print(data.y.shape)
        target = torch.argmax(data.y, dim=1)
        # calculate the loss
        loss = loss_fn(out, target)
        # zero the gradients of the weights so that the gradients are not accumulated
        # calculate the gradients using backpropagation
        loss.backward()
        # update the weights
        optimizer.step()
    
    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item()}')
        

Epoch 0, Loss: 2.6831860542297363
Epoch 10, Loss: 0.1753063201904297
Epoch 20, Loss: 0.032157398760318756
Epoch 30, Loss: 0.013998419046401978
Epoch 40, Loss: 0.007112658582627773
Epoch 50, Loss: 0.004827866796404123
Epoch 60, Loss: 0.0024541281163692474
Epoch 70, Loss: 0.0015282867243513465
Epoch 80, Loss: 0.0013799677835777402
Epoch 90, Loss: 0.000982397934421897


In [18]:
model1.eval()
correct = 0
for data in X_train:
    out, emb = model1(data.x, data.edge_index)  
    pred = out.argmax(dim=1)  # Use the class with highest probability.
    y_label = (data.y.tolist())
    y_label = y_label[0].index(1.0)
    pred_label = (pred.tolist())[0]
    # print(pred_label)
    # print(y_label)
    if y_label == pred_label:
        correct += 1            
    # correct += int((pred == data.y).sum())  # Check against ground-truth labels.
acc = correct / len(X_train)  # Derive ratio of correct predictions.

acc

0.9272151898734177

In [19]:
model1.eval()
correct = 0
for data in X_test:
    out, emb = model1(data.x, data.edge_index)  
    pred = out.argmax(dim=1)  # Use the class with highest probability.
    y_label = (data.y.tolist())
    y_label = y_label[0].index(1.0)
    pred_label = (pred.tolist())[0]
    # print(pred_label)
    # print(y_label)
    if y_label == pred_label:
        correct += 1            
    # correct += int((pred == data.y).sum())  # Check against ground-truth labels.
test_acc = correct / len(X_test)  # Derive ratio of correct predictions.

test_acc

0.8625

In [22]:
torch.save(model1.state_dict(), 'grahpSAGE_92_86_graph_embeddings.pth')

In [59]:
def contrastive_loss(z_i, z_j, temperature=0.5):
    z_i = F.normalize(z_i, p=2, dim=1)
    z_j = F.normalize(z_j, p=2, dim=1)
    
    # print(z_i)
    # print(z_j)
    sim_matrix = torch.mm(z_i, z_j.t()) / temperature
    # print(sim_matrix)
    sim_exp = torch.exp(sim_matrix)
    # print(sim_exp)
    sim_sum = sim_exp.sum(dim=1)
    # print(sim_sum)
    
    loss = -torch.log(sim_exp.diagonal())
    
    return loss


In [60]:
import random

def augment_graph(graph):
    # Example augmentation: randomly drop edges
    edge_index = graph.edge_index.numpy()
    num_edges = edge_index.shape[1]
    keep_edges = random.sample(range(num_edges), k=int(0.9 * num_edges))
    edge_index = torch.tensor(edge_index[:, keep_edges], dtype=torch.long)
    return Data(x=graph.x, edge_index=edge_index)

