In [47]:
import os
from PIL import Image
import cv2 
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
#import matplotlib.image as mpimg
import networkx as nx
import torch
import torch.nn as nn
import dgl
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from dgl.nn import GraphConv
import dgl.function as fn
import torchvision.models as models
import torchvision.transforms as transforms
from sklearn.decomposition import PCA
import pickle
torch.manual_seed(1)
global_resize_image = (512,512)

In [49]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [48]:

class CustomModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
        super(CustomModel, self).__init__()
        self.gcn1 = dgl.nn.GraphConv(input_dim, hidden_dim)
        self.gcn2 = dgl.nn.GraphConv(hidden_dim, hidden_dim)
        self.lstm = nn.LSTM(hidden_dim, hidden_dim, num_layers=num_layers, batch_first=True, bidirectional=False)
        self.bilstm = nn.LSTM(hidden_dim * 2, hidden_dim, num_layers=num_layers, batch_first=True, bidirectional=True)
        self.linear1 = nn.Linear(hidden_dim * 2, hidden_dim)
        self.linear2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, graph, features):
        h = self.gcn1(graph, features)
        h = F.relu(h)
        h = self.gcn2(graph, h)
        h = F.relu(h)
        # Graph-level LSTM
        h = dgl.mean_nodes(graph, h)
        h = h.unsqueeze(0)  # add a batch dimension for LSTM
        h, _ = self.lstm(h)
        h, _ = self.bilstm(h)
        h = self.linear1(h)
        h = F.relu(h)
        h = self.linear2(h)
        output = F.softmax(h, dim=2)
        return output





In [45]:
# Define a custom dataset class inheriting from DGLDataset
class CustomDataset(Dataset):
    def __init__(self, file_path):
        with open(file_path, "rb") as f:
            data = pickle.load(f)
        self.Image_names = data["image_names"]
        self.Graph_list = data["Graph_list"]
        #self.Feature_list = data["Feature_list"]
        self.Descriptions = data["Descriptions"]

    def __len__(self):
        return len(self.Image_names)

    def __getitem__(self, idx):
        # Return a tuple containing image name, DGLGraph, and features
        return self.Image_names[idx], self.Graph_list[idx], self.Descriptions[idx]

# Custom collate function to handle the batch conversion
def collate(samples):
    # Unzip the samples
    image_names, graphs, description= zip(*samples)
    
    # Stack graphs and features into batches
    batched_graph = dgl.batch(graphs)
    #batched_features = torch.stack(features)
    batched_description = description
    return image_names, batched_graph, batched_description

# File path where data is saved
file_path = "Image_Graph_data.pkl"

# Create a dataset instance
dataset = CustomDataset(file_path)

# Create a DataLoader instance
batch_size = 32  # Set your desired batch size
data_loader = DataLoader(dataset, batch_size=batch_size, collate_fn=collate)

# Accessing data using DataLoader
for image_names, batched_graph, batched_description in data_loader:
    # Accessing batched data
    #feature = batched_graph.ndata['feature']
    print(batched_graph.batch_size)
    print(batched_graph.batch_num_nodes())
    print(batched_graph.batch_num_edges())
    print("Batch of Image Names:", image_names)
    print("Batch of Graphs:", batched_graph)
    print("Batch of Features:", batched_graph.ndata)
    print("Batch of sentences:", batched_description)
    # Perform further operations as needed


32
tensor([ 5, 14,  4,  3,  4,  5,  3,  9,  4,  8,  4,  7,  4,  9,  3,  3,  3,  2,
        10,  5,  6,  2,  8,  3,  3,  4,  4,  4,  5,  7,  6,  4])
tensor([10, 91,  6,  3,  6, 10,  3, 36,  6, 28,  6, 21,  6, 36,  3,  3,  3,  1,
        45, 10, 15,  1, 28,  3,  3,  6,  6,  6, 10, 21, 15,  6])
Batch of Image Names: ('1160171', '1788', '2315863', '2316839', '2316986', '2317358', '2318441', '2318537', '2318632', '2321297', '2322121', '2322229', '2322312', '2322374', '2323474', '2326705', '2329048', '2331290', '2333122', '2333655', '2335332', '2336878', '2338731', '2338944', '2339963', '2340200', '2340461', '2340504', '2341481', '2341924', '2342185', '2342657')
Batch of Graphs: Graph(num_nodes=165, num_edges=453,
      ndata_schemes={'features': Scheme(shape=(1, 4096), dtype=torch.float32)}
      edata_schemes={})
Batch of Features: {'features': tensor([[[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],

        [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.8047]],

        [[0.0

In [None]:
def train_model(model, train_loader, criterion, optimizer, num_epochs):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for graph, features, labels in train_loader:
            optimizer.zero_grad()
            output = model(graph, features)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss}")

In [None]:
# Example usage:
# Assuming you have graphs, features, and labels for your dataset

# Define model parameters
input_dim = 4096
hidden_dim = 256
output_dim = 4088
num_layers =1
num_epochs =2

# Create custom model
model = CustomModel(input_dim, hidden_dim, output_dim, num_layers)

# Create dataset
dataset = CustomDataset(graphs, features, labels)

# Create data loader
batch_size =32
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=collate)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Train the model
train_model(model, train_loader, criterion, optimizer, num_epochs)


In [None]:
# <==============  GCN Model  ==============>


#function to create a dense graph
def create_dense_graph(num_nodes):
    g = dgl.DGLGraph()
    g.add_nodes(num_nodes)
    src, dst = [], []
    for i in range(num_nodes):
        for j in range(num_nodes):
            if i != j:  # Avoid self-loops
                src.append(i)
                dst.append(j)
    g.add_edges(src, dst)
    return g

#GCN model
class GCN(nn.Module):
    def __init__(self, in_feats, hidden_feats_1, hidden_feats_2, edge_feats):
        super(GCN, self).__init__()
        self.edge_feats = edge_feats
        self.conv1 = GraphConv(in_feats, hidden_feats_1)
        self.conv2 = GraphConv(hidden_feats_1, hidden_feats_2)
        self.dropout = nn.Dropout(0.5)
    def forward(self, g, features):
        h = self.conv1(g, features)
        h = F.relu(h)
        h = self.dropout(h)
        h = self.conv2(g, h)

        
        g.ndata['h'] = h

        return h