In [None]:
!pip install dgl -f https://data.dgl.ai/wheels/repo.html

Looking in links: https://data.dgl.ai/wheels/repo.html
Collecting dgl
  Downloading dgl-2.1.0-cp310-cp310-manylinux1_x86_64.whl.metadata (553 bytes)
Collecting torchdata>=0.5.0 (from dgl)
  Downloading torchdata-0.9.0-cp310-cp310-manylinux1_x86_64.whl.metadata (5.5 kB)
Downloading dgl-2.1.0-cp310-cp310-manylinux1_x86_64.whl (8.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.5/8.5 MB[0m [31m32.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading torchdata-0.9.0-cp310-cp310-manylinux1_x86_64.whl (2.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.7/2.7 MB[0m [31m45.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torchdata, dgl
Successfully installed dgl-2.1.0 torchdata-0.9.0


In [None]:
!pip install dgl==0.9.1 -f https://data.dgl.ai/wheels/repo.html

Looking in links: https://data.dgl.ai/wheels/repo.html
Collecting dgl==0.9.1
  Downloading dgl-0.9.1-cp310-cp310-manylinux1_x86_64.whl.metadata (557 bytes)
Downloading dgl-0.9.1-cp310-cp310-manylinux1_x86_64.whl (4.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.9/4.9 MB[0m [31m14.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: dgl
  Attempting uninstall: dgl
    Found existing installation: dgl 2.1.0
    Uninstalling dgl-2.1.0:
      Successfully uninstalled dgl-2.1.0
Successfully installed dgl-0.9.1


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import dgl
from dgl.nn import GraphConv

DGL backend not selected or invalid.  Assuming PyTorch for now.


Setting the default backend to "pytorch". You can change it in the ~/.dgl/config.json file or export the DGLBACKEND environment variable.  Valid options are: pytorch, mxnet, tensorflow (all lowercase)


  @custom_fwd(cast_inputs=th.float16)
  def backward(ctx, dZ):
  @custom_fwd(cast_inputs=th.float16)
  def backward(ctx, *dZ):
  @custom_fwd(cast_inputs=th.float16)
  def backward(ctx, dZ):
  @custom_fwd(cast_inputs=th.float16)
  def backward(ctx, *dZ):
  @custom_fwd(cast_inputs=th.float16)
  def backward(ctx, grad_out):
  @custom_fwd(cast_inputs=th.float16)
  def backward(ctx, *grad_out):
  @custom_fwd(cast_inputs=th.float16)
  def backward(ctx, dy):
  @custom_fwd(cast_inputs=th.float16)
  def backward(ctx, dy):
  @custom_fwd(cast_inputs=th.float16)
  @custom_fwd(cast_inputs=th.float16)


Define the dataset processing and graph construction

In [None]:
def process_scene_graph(scene_graph):
    """
    Convert a scene graph in textual form into nodes and edges.
    """
    nodes = set()
    edges = []
    print("Raw scene_graph:", scene_graph)



    # Parse the scene graph
    for relation in scene_graph:
        print("relation:", relation)
        print("len(relation)",len(relation))
        if len(relation) < 3:
            continue
        for i in range(0,len(relation),3):
            src = relation[i].strip().lstrip("(").strip()
            rel = relation[i + 1].strip().lstrip('v:').strip()
            dst = relation[i + 2].strip().rstrip(")").strip()
            print(f"Processed relation: src = {src}, rel = {rel}, dst = {dst}")
            nodes.add(src)
            nodes.add(dst)
            edges.append((src, rel, dst))

    return list(nodes), edges

def create_dgl_graph(nodes, edges):
    """
    Create a DGL graph from nodes and edges.
    """
    # Map nodes and relations to unique IDs
    print("nodes:", nodes)
    print("edges:", edges)
    node_to_id = {node: i for i, node in enumerate(nodes)}
    print("node_to_id", node_to_id)
    relation_to_id = {rel: i for i, (_, rel, _) in enumerate(edges)}
    print("relation_to_id:", relation_to_id)

    # Prepare source and destination node indices
    src_nodes = [node_to_id[src] for src, _, _ in edges]
    print("src_nodes:", src_nodes)
    dst_nodes = [node_to_id[dst] for _, _, dst in edges]
    print("dst_nodes:", dst_nodes)

    # Create the graph
    g = dgl.graph((src_nodes, dst_nodes), num_nodes=len(nodes))
    print("g (before adding self-loops):", g)

    # Add self-loops to the graph
    g = dgl.add_self_loop(g)
    print("g (after adding self-loops):", g)

    # Update edge features to match the new number of edges
    original_edge_features = [relation_to_id[rel] for _, rel, _ in edges]
    self_loop_features = [-1] * len(nodes)  # Assign default value for self-loops
    all_edge_features = original_edge_features + self_loop_features
    g.edata['relation_type'] = torch.tensor(all_edge_features, dtype=torch.int64)
    print("g with edge data:", g)

    return g, node_to_id, relation_to_id


Define the GNN model

In [None]:
class SceneGraphGNN(nn.Module):
    def __init__(self, in_feats, hidden_feats, num_classes):
        super(SceneGraphGNN, self).__init__()
        self.conv1 = GraphConv(in_feats, hidden_feats)  # First GraphConv layer
        self.conv2 = GraphConv(hidden_feats, num_classes)  # Second GraphConv layer
        self.relu = nn.ReLU()  # Activation function (ReLU)

    def forward(self, g, features):
        # Graph Convolutional Layers
        h = self.conv1(g, features)  # Apply first graph convolution
        h = self.relu(h)  # Apply ReLU activation
        h = self.conv2(g, h)  # Apply second graph convolution
        return h  # Return the output features (node embeddings)


Dataset loader function

In [None]:
def load_sample(example):

    # Extract scene graph
    scene_graph = example['scene_graph']
    scene_graph = [tuple(rel.strip("()").split(",")) for rel in scene_graph.split("),")]

    # Create nodes and edges
    nodes, edges = process_scene_graph(scene_graph)

    # Create DGL graph
    g, node_to_id, relation_to_id = create_dgl_graph(nodes, edges)

    g.ndata['feat'] = torch.rand(len(nodes), 64)

    target = torch.rand(len(nodes), 512)

    return g, target

In [None]:
from google.colab import drive
drive.mount('/content/drive')
checkpoint_path = "/content/drive/MyDrive/gnn_checkpoints/model_checkpoint.pth"


Mounted at /content/drive


Finetuning LOOP

In [None]:
def train_gnn(dataset, model, epochs=10, lr=0.01):
    optimizer = optim.Adam(model.parameters(), lr=lr)
    loss_fn = nn.CrossEntropyLoss()  # Assuming it's a classification task
    checkpoint_interval=2

    for epoch in range(epochs):
        total_loss = 0
        count = 0  # Reset count for each epoch
        for example in dataset['train']:
            # Load the graph and target
            g, target = load_sample(example)

            # Forward pass
            logits = model(g, g.ndata['feat'])
            print("logits:", logits)

            # Compute loss
            loss = loss_fn(logits, target)

            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

            count += 1
            if count == 50:
                break

        print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss:.4f}")

        if (epoch + 1) % checkpoint_interval == 0:
            torch.save({
                'epoch': epoch + 1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': total_loss,
            }, checkpoint_path)
            print(f"Checkpoint saved at epoch {epoch + 1}")

FACTUAL_SCENE_GRAPH_DATASET

In [None]:
!pip install datasets

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m18.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (

In [None]:
from datasets import load_dataset
from huggingface_hub import login
dataset = load_dataset('lizhuang144/FACTUAL_Scene_Graph_ID')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/96.0 [00:00<?, ?B/s]

Repo card metadata block was not found. Setting CardData to empty.


FACTUAL_sg_id.csv:   0%|          | 0.00/3.93M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/40369 [00:00<?, ? examples/s]

SPLIT DATASET

In [None]:
len(dataset)
type(dataset)
print(dataset)

DatasetDict({
    train: Dataset({
        features: ['image_id', 'region_id', 'caption', 'scene_graph'],
        num_rows: 40369
    })
})


In [None]:
input_dim = 64  # Input node feature dimension
hidden_dim = 128  # Hidden layer dimension
num_classes = 512  # Number of output classes

# Initialize the model
model = SceneGraphGNN(input_dim, hidden_dim, num_classes)

# Fine-tune the model on the dataset
train_gnn(dataset, model)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
nodes: ['bears', 'car']
edges: [('bears', 'with', 'car')]
node_to_id {'bears': 0, 'car': 1}
relation_to_id: {'with': 0}
src_nodes: [0]
dst_nodes: [1]
g (before adding self-loops): Graph(num_nodes=2, num_edges=1,
      ndata_schemes={}
      edata_schemes={})
g (after adding self-loops): Graph(num_nodes=2, num_edges=3,
      ndata_schemes={}
      edata_schemes={})
g with edge data: Graph(num_nodes=2, num_edges=3,
      ndata_schemes={}
      edata_schemes={'relation_type': Scheme(shape=(), dtype=torch.int64)})
logits: tensor([[ 0.0693, -0.0340,  0.0373,  ...,  0.0350,  0.0289, -0.0346],
        [ 0.0693, -0.0340,  0.0373,  ...,  0.0350,  0.0289, -0.0346]],
       grad_fn=<AddBackward0>)
Raw scene_graph: [(' wall ', ' is ', ' glass ) ', ' ( wall ', ' at ', ' train platform ')]
relation: (' wall ', ' is ', ' glass ) ', ' ( wall ', ' at ', ' train platform ')
len(relation) 6
Processed relation: src = wall, rel = is, dst = gl