In [3]:
%cd ../
%pwd



/home/irfan/roadscene2vec


'/home/irfan/roadscene2vec'

In [4]:
import sys, pdb
from pathlib import Path
sys.path.append(str(Path("../../")))
import torch
import numpy as np
from sklearn.utils.class_weight import compute_class_weight
from sklearn.utils import resample
from sklearn.model_selection import train_test_split, StratifiedKFold
from tqdm import tqdm
# /home/irfan/roadscene2vec/roadscene2vec/data/dataset.py
from roadscene2vec.data.dataset import SceneGraphDataset
# from torch_geometric.data import Data, DataLoader, DataListLoader
from roadscene2vec.learning.util.metrics import get_metrics, log_wandb, log_wandb_transfer_learning 

scene_graph_dataset  = SceneGraphDataset()
scene_graph_dataset.dataset_save_path ="/home/irfan/roadscene2vec/examples/object_based_sg_extraction_output.pkl"
scene_graph_dataset_ = scene_graph_dataset.load()  

In [13]:
import torch
from torch_geometric.data import Data

# Dummy input
num_nodes = 6
in_dim = 9        # embedding size
pos_dim = 2       # x, y

# Random node features + positions
node_feats = torch.randn(num_nodes, in_dim)
node_pos = torch.rand(num_nodes, pos_dim)  # (x, y) in [0, 1]

# Concatenate node features + positions
node_input = torch.cat([node_feats, node_pos], dim=1)  # [6, 11]

# Edge list (5 edges)
edge_index = torch.tensor([
    [1, 2, 3, 4, 5],
    [4, 0, 0, 0, 3]
], dtype=torch.long)

# Dummy edge features (optional)
edge_features = torch.randn(edge_index.size(1), 4)

# Dummy target: shifted positions (e.g. physics sim)
node_pos_target = node_pos + 0.05 * torch.randn_like(node_pos)

# Wrap into a PyG Data object
data = Data(
    x=node_input,                     # node features + position
    edge_index=edge_index,           # connectivity
    edge_attr=edge_features,         # optional
    pos=node_pos,                    # original position (input)
    y=node_pos_target                # target: predicted position
)


In [6]:
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class PositionPredictor(nn.Module):
    def __init__(self, in_dim, hidden_dim=32):
        super().__init__()
        self.gcn1 = GCNConv(in_dim, hidden_dim)
        self.gcn2 = GCNConv(hidden_dim, hidden_dim)
        self.out = nn.Linear(hidden_dim, 2)  # Predict (x, y)

    def forward(self, x, edge_index):
        x = F.relu(self.gcn1(x, edge_index))
        x = F.relu(self.gcn2(x, edge_index))
        return self.out(x)  # Output: [num_nodes, 2]


In [7]:
model = PositionPredictor(in_dim=data.x.size(1))
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = nn.MSELoss()

for epoch in range(300):
    model.train()
    pred = model(data.x, data.edge_index)  # [num_nodes, 2]
    loss = loss_fn(pred, data.y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch % 50 == 0:
        print(f"Epoch {epoch} | Loss: {loss.item():.6f}")


Epoch 0 | Loss: 0.449032
Epoch 50 | Loss: 0.000849
Epoch 100 | Loss: 0.000010
Epoch 150 | Loss: 0.000000
Epoch 200 | Loss: 0.000000
Epoch 250 | Loss: 0.000000


In [9]:
import torch
from torch_geometric.data import Data

# --- Step 1: Define the model (must match training)
from torch_geometric.nn import GCNConv
import torch.nn.functional as F
import torch.nn as nn

class PositionPredictor(nn.Module):
    def __init__(self, in_dim, hidden_dim=32):
        super().__init__()
        self.gcn1 = GCNConv(in_dim, hidden_dim)
        self.gcn2 = GCNConv(hidden_dim, hidden_dim)
        self.out = nn.Linear(hidden_dim, 3)  # Predict (x, y)

    def forward(self, x, edge_index):
        x = F.relu(self.gcn1(x, edge_index))
        x = F.relu(self.gcn2(x, edge_index))
        return self.out(x)

# --- Step 2: Load or define dummy graph input
num_nodes = 6
in_dim = 9
pos_dim = 3

# Dummy node features and positions
node_feats = torch.randn(num_nodes, in_dim)
node_pos = torch.rand(num_nodes, pos_dim)
node_input = torch.cat([node_feats, node_pos], dim=1)  # [6, 11]

# Dummy edge_index
edge_index = torch.tensor([
    [1, 2, 3, 4, 5],
    [4, 0, 0, 0, 3]
], dtype=torch.long)

# Wrap as PyG data object
data = Data(
    x=node_input,
    edge_index=edge_index
)

# --- Step 3: Initialize the model and run inference
model = PositionPredictor(in_dim=node_input.size(1))
model.eval()

with torch.no_grad():
    predicted_pos = model(data.x, data.edge_index)  # [6, 2]

print("Predicted node positions:")
print(predicted_pos)


Predicted node positions:
tensor([[-0.0204, -0.0745, -0.0348],
        [-0.0561, -0.0013, -0.2441],
        [-0.1112, -0.0754, -0.2363],
        [-0.0888,  0.0934,  0.0077],
        [-0.0249, -0.0386, -0.1716],
        [-0.1064,  0.0737, -0.0095]])


In [7]:
import os
import pickle

# Directory where pickle files are stored
load_dir = "/home/irfan/roadscene2vec/examples/town2"

# List to store loaded scenegraph dictionaries
loaded_scenegraphs = []

# Loop through all .pkl files in the directory
for filename in sorted(os.listdir(load_dir)):
    if filename.endswith('.pkl'):
        filepath = os.path.join(load_dir, filename)
        with open(filepath, 'rb') as f:
            sg_dict = pickle.load(f)
            loaded_scenegraphs.append(sg_dict)
            print(f"Loaded: {filename}, Frame: {sg_dict['frame_number']}")
            print(f"Loaded: {filename}, Node Features: {sg_dict['node_features'].shape}")
            print(f"Loaded: {filename}, Edge Index: {sg_dict['edge_index'].shape}")

            


Loaded: scenegraph_24998.pkl, Frame: 24998
Loaded: scenegraph_24998.pkl, Node Features: torch.Size([9, 9])
Loaded: scenegraph_24998.pkl, Edge Index: torch.Size([2, 8])
Loaded: scenegraph_24999.pkl, Frame: 24999
Loaded: scenegraph_24999.pkl, Node Features: torch.Size([7, 9])
Loaded: scenegraph_24999.pkl, Edge Index: torch.Size([2, 6])
Loaded: scenegraph_25000.pkl, Frame: 25000
Loaded: scenegraph_25000.pkl, Node Features: torch.Size([8, 9])
Loaded: scenegraph_25000.pkl, Edge Index: torch.Size([2, 7])
Loaded: scenegraph_25001.pkl, Frame: 25001
Loaded: scenegraph_25001.pkl, Node Features: torch.Size([8, 9])
Loaded: scenegraph_25001.pkl, Edge Index: torch.Size([2, 7])
Loaded: scenegraph_25002.pkl, Frame: 25002
Loaded: scenegraph_25002.pkl, Node Features: torch.Size([8, 9])
Loaded: scenegraph_25002.pkl, Edge Index: torch.Size([2, 7])
Loaded: scenegraph_25003.pkl, Frame: 25003
Loaded: scenegraph_25003.pkl, Node Features: torch.Size([7, 9])
Loaded: scenegraph_25003.pkl, Edge Index: torch.Size(

In [11]:
data.x.shape

torch.Size([6, 12])

In [None]:


# Store predictions
predicted_positions = []
scenegraph_files = [
    '/home/irfan/roadscene2vec/examples/town2/scenegraph_27000.pkl']


for i, (scene_path) in enumerate(scenegraph_files):
    node_feats, edge_idx = load_scenegraph(scene_path)

    # Create batch: all zeros since we have only one graph
    batch = torch.zeros(node_feats.size(0), dtype=torch.long)
    prev_pos = torch.tensor([0.8, 0.9, 1.0], dtype=torch.float)


    # Ensure prev_pos is tensor of shape (3,)
    if not isinstance(prev_pos, torch.Tensor):
        prev_pos = torch.tensor(prev_pos, dtype=torch.float)
    elif prev_pos.ndim == 2 and prev_pos.shape[0] == 1:
        prev_pos = prev_pos.squeeze(0)

    with torch.no_grad():
        pred_pos = model(node_feats, edge_idx, batch, prev_pos)
        predicted_positions.append(pred_pos.squeeze(0))  # shape (3,)
        print(f"Predicted position for frame {i}: {pred_pos.squeeze(0).numpy()}")


NameError: name 'load_scenegraph' is not defined

In [16]:
# Sample node features (7 nodes, 9-dim one-hot)
node = torch.tensor([
    [0., 0., 1., 0., 0., 0., 0., 0., 0.],
    [0., 0., 0., 0., 0., 0., 0., 1., 0.],
    [0., 0., 0., 0., 0., 0., 1., 0., 0.],
    [0., 0., 0., 0., 0., 0., 1., 0., 0.],
    [0., 0., 0., 0., 0., 0., 1., 0., 0.],
    [0., 1., 0., 0., 0., 0., 0., 0., 0.],
    [0., 1., 0., 0., 0., 0., 0., 0., 0.]
], dtype=torch.float)

# Edge index (2, E)
edge_index = torch.tensor([
    [1, 2, 3, 4, 5, 6],
    [4, 0, 0, 0, 2, 3]
], dtype=torch.long)

# Batch: all zeros since one graph
batch = torch.zeros(node.size(0), dtype=torch.long)

# Previous position (x, y, z)
prev_position = torch.tensor([0.8, 0.9, 1.0], dtype=torch.float)

# Create and run model
model = GCN_SingleFramePositionPredictor(in_dim=9)
model.eval()

with torch.no_grad():
    pred = model(node, edge_index, batch, prev_position)

print("🔮 Predicted next 3D position:", pred.squeeze().tolist())


🔮 Predicted next 3D position: [0.0018148496747016907, 0.027338869869709015, -0.18130692839622498]


In [None]:
import torch
import pickle
import os


class ScenegraphSequenceDataset(Dataset):
    def __init__(self, graph_dir, position_txt, sequence_length=5):
        self.graph_dir = graph_dir
        self.sequence_length = sequence_length

        # Load and sort scenegraph file paths
        self.graph_files = sorted([
            os.path.join(graph_dir, f)
            for f in os.listdir(graph_dir)
            if f.endswith(".pkl")
        ])

        # Load positions.txt, ignore frame_id, keep only x, y, z
        with open(position_txt, "r") as f:
            self.positions = [
                torch.tensor([float(x), float(y), float(z)], dtype=torch.float)
                for _, x, y, z in (line.strip().split(",") for line in f)
            ]

        # Ensure we have enough frames for a full sequence + target
        self.max_index = min(len(self.graph_files), len(self.positions)) - 1
        self.valid_range = self.max_index - self.sequence_length

    def __len__(self):
        return self.valid_range

    def __getitem__(self, idx):
        node_features_seq = []
        edge_index_seq = []
        batch_seq = []
        prev_positions_seq = []

        for i in range(idx, idx + self.sequence_length):
            with open(self.graph_files[i], "rb") as f:
                sg = pickle.load(f)

            node_feats = torch.tensor(sg["node_features"], dtype=torch.float)
            edge_index = torch.tensor(sg["edge_index"], dtype=torch.long)
            batch = torch.zeros(node_feats.size(0), dtype=torch.long)

            node_features_seq.append(node_feats)
            edge_index_seq.append(edge_index)
            batch_seq.append(batch)
            prev_positions_seq.append(self.positions[i])

        # Target = position at time t + sequence_length
        target_position = self.positions[idx + self.sequence_length]

        return {
            "node_features_seq": node_features_seq,
            "edge_index_seq": edge_index_seq,
            "batch_seq": batch_seq,
            "prev_positions_seq": prev_positions_seq,
            "target_position": target_position
        }


In [17]:
from torch.utils.data import DataLoader, random_split

dataset = ScenegraphSequenceDataset(
    graph_dir="/home/irfan/roadscene2vec/examples/town2/",
    position_txt="/home/irfan/roadscene2vec/examples/transferdata/pos.txt",
    sequence_length=5
)

# Split train/test
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size

train_dataset, test_dataset = random_split(dataset, [train_size, test_size])
train_loader = DataLoader(train_dataset, batch_size=20, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=20, shuffle=False)


In [22]:
len(test_loader)

26

In [5]:
import torch
from torch.utils.data import Dataset
import os
import pickle
def pad_tensor(tensor, target_shape):
    padded = torch.zeros(*target_shape, dtype=tensor.dtype)
    slices = tuple(slice(0, min(s, t)) for s, t in zip(tensor.shape, target_shape))
    padded[slices] = tensor[slices]
    return padded

class ScenegraphSequenceDataset(Dataset):
    def __init__(self, graph_dir, position_txt, sequence_length=5):
        self.graph_dir = graph_dir
        self.sequence_length = sequence_length

        self.graph_files = sorted([
            os.path.join(graph_dir, f)
            for f in os.listdir(graph_dir)
            if f.endswith(".pkl")
        ])

        with open(position_txt, "r") as f:
            self.positions = [
                torch.tensor([float(x), float(y), float(z)], dtype=torch.float)
                for _, x, y, z in (line.strip().split(",") for line in f)
            ]

        self.max_index = min(len(self.graph_files), len(self.positions)) - 1
        self.valid_range = self.max_index - self.sequence_length

    def __len__(self):
        return self.valid_range

    def __getitem__(self, idx):
        node_features_seq = []
        edge_index_seq = []
        prev_positions_seq = []

        for i in range(idx, idx + self.sequence_length):
            with open(self.graph_files[i], "rb") as f:
                sg = pickle.load(f)

            # Pad node features to (9, 9)
            node_feats = torch.tensor(sg["node_features"], dtype=torch.float)
            padded_node_feats = pad_tensor(node_feats, (9, 9))
            node_features_seq.append(padded_node_feats)

            # Pad edge_index to (2, 9)
            edge_index = torch.tensor(sg["edge_index"], dtype=torch.long)
            padded_edge_index = pad_tensor(edge_index, (2, 9))
            edge_index_seq.append(padded_edge_index)

            prev_positions_seq.append(self.positions[i])  # shape (3,)

        target_position = self.positions[idx + self.sequence_length]  # shape (3,)

        return {
            "node_features_seq": torch.stack(node_features_seq),        # shape [sequence_length, 9, 9]
            "edge_index_seq": torch.stack(edge_index_seq),              # shape [sequence_length, 2, 9]
            "prev_positions_seq": torch.stack(prev_positions_seq),      # shape [sequence_length, 3]
            "target_position": target_position                          # shape [3]
        }


In [5]:
import torch
from torch.utils.data import Dataset
import os
import pickle

def pad_tensor(tensor, target_shape):
    padded = torch.zeros(*target_shape, dtype=tensor.dtype)
    slices = tuple(slice(0, min(s, t)) for s, t in zip(tensor.shape, target_shape))
    padded[slices] = tensor[slices]
    return padded

class ScenegraphSequenceDataset(Dataset):
    def __init__(self, graph_dir, position_txt, sequence_length=5):
        self.graph_dir = graph_dir
        self.sequence_length = sequence_length

        # Load and sort scenegraph file paths
        self.graph_files = sorted([
            os.path.join(graph_dir, f)
            for f in os.listdir(graph_dir)
            if f.endswith(".pkl")
        ])

        # Load positions.txt, ignore frame_id, keep only x, y, z
        with open(position_txt, "r") as f:
            self.positions = [
                torch.tensor([float(x), float(y), float(z)], dtype=torch.float)
                for _, x, y, z in (line.strip().split(",") for line in f)
            ]

        # Ensure we have enough frames for a full sequence + target
        self.max_index = min(len(self.graph_files), len(self.positions)) - 1
        self.valid_range = self.max_index - self.sequence_length

    def __len__(self):
        return self.valid_range

    def __getitem__(self, idx):
        node_features_seq = []
        edge_index_seq = []
        batch_seq = []
        prev_positions_seq = []

        for i in range(idx, idx + self.sequence_length):
            with open(self.graph_files[i], "rb") as f:
                sg = pickle.load(f)

            node_feats = torch.tensor(sg["node_features"], dtype=torch.float)
            edge_index = torch.tensor(sg["edge_index"], dtype=torch.long)

            # Padding
            padded_node_feats = pad_tensor(node_feats, (9, 9))          # shape [9, 9]
            padded_edge_index = pad_tensor(edge_index, (2, 9))          # shape [2, 9]
            padded_batch = pad_tensor(torch.zeros(node_feats.size(0), dtype=torch.long), (9,))  # shape [9]

            node_features_seq.append(padded_node_feats)
            edge_index_seq.append(padded_edge_index)
            batch_seq.append(padded_batch)
            prev_positions_seq.append(self.positions[i])  # shape [3]

        target_position = self.positions[idx + self.sequence_length]  # shape [3]

        return {
            "node_features_seq": node_features_seq,        # list of [9, 9]
            "edge_index_seq": edge_index_seq,              # list of [2, 9]
            "batch_seq": batch_seq,                        # list of [9]
            "prev_positions_seq": prev_positions_seq,      # list of [3]
            "target_position": target_position             # shape [3]
        }


In [6]:
from torch.utils.data import DataLoader, random_split

dataset = ScenegraphSequenceDataset(
    graph_dir="/home/irfan/roadscene2vec/examples/town2/",
    position_txt="/home/irfan/roadscene2vec/examples/transferdata/pos.txt",
    sequence_length=5
)

# Split train/test
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size

train_dataset, test_dataset = random_split(dataset, [train_size, test_size])
train_loader = DataLoader(train_dataset, batch_size=20, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=20, shuffle=False)


In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_mean_pool


class GCN_SingleFramePositionPredictor(nn.Module):
    def __init__(self, in_dim, gcn_hidden_dim=32, mlp_hidden_dim=64):
        super().__init__()

        self.gcn1 = GCNConv(in_dim, gcn_hidden_dim)
        self.gcn2 = GCNConv(gcn_hidden_dim, gcn_hidden_dim)

        # MLP: takes [graph_embedding + prev_position] → next position
        self.mlp = nn.Sequential(
            nn.Linear(gcn_hidden_dim + 3, mlp_hidden_dim),
            nn.ReLU(),
            nn.Linear(mlp_hidden_dim, 3)
        )

    def forward(self, node_features, edge_index, batch, prev_position):
        """
        Inputs:
        - node_features: Tensor (N, in_dim)
        - edge_index: Tensor (2, E)
        - batch: Tensor (N,) — all zeros if single graph
        - prev_position: Tensor (3,) or (1, 3)

        Output:
        - predicted_position: Tensor (1, 3)
        """

        x = F.relu(self.gcn1(node_features, edge_index))
        x = F.relu(self.gcn2(x, edge_index))

        graph_embed = global_mean_pool(x, batch)  # shape (1, D)

        # Ensure prev_position shape is (1, 3)
        if prev_position.ndim == 1:
            prev_position = prev_position.unsqueeze(0)

        combined = torch.cat([graph_embed, prev_position], dim=1)  # shape (1, D+3)
        predicted_position = self.mlp(combined)  # shape (1, 3)

        return predicted_position

In [8]:
# Prepare the model
model = GCN_SingleFramePositionPredictor(in_dim=9)
model.train()

GCN_SingleFramePositionPredictor(
  (gcn1): GCNConv(9, 32)
  (gcn2): GCNConv(32, 32)
  (mlp): Sequential(
    (0): Linear(in_features=35, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=3, bias=True)
  )
)

In [9]:
import torch
import torch.nn as nn
import torch.optim as optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
model=model.to(device)

In [4]:
import torch
def train_epoch(model, dataloader, optimizer, criterion):
    model.train()
    device= torch.device("cuda" if torch.cuda.is_available() else "cpu")    
    total_loss = 0.0

    for batch in dataloader:
        batch = {k: v[0] for k, v in batch.items()}  # unwrap batch_size=1

        node_feats = batch["node_features_seq"][-1].to(device)         # use latest frame
        edge_index = batch["edge_index_seq"][-1].to(device)
        batch_tensor = batch["batch_seq"][-1].to(device)
        prev_position = batch["prev_positions_seq"][-1].to(device)     # last known position
        target_position = batch["target_position"].to(device)

        optimizer.zero_grad()
        # print(node_feats.shape, edge_index.shape, batch_tensor.shape, prev_position.shape, target_position.shape)
        output = model(node_feats, edge_index, batch_tensor, prev_position)
        loss = criterion(output.squeeze(0), target_position)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    return total_loss / len(dataloader)


def validate(model, dataloader, criterion):
    model.eval()
    total_loss = 0.0
    device= torch.device("cuda" if torch.cuda.is_available() else "cpu")    


    with torch.no_grad():
        for batch in dataloader:
            batch = {k: v[0] for k, v in batch.items()}

            node_feats = batch["node_features_seq"][-1].to(device)
            edge_index = batch["edge_index_seq"][-1].to(device)
            batch_tensor = batch["batch_seq"][-1].to(device)
            prev_position = batch["prev_positions_seq"][-1].to(device)
            target_position = batch["target_position"].to(device)

            output = model(node_feats, edge_index, batch_tensor, prev_position)
            loss = criterion(output.squeeze(0), target_position)
            total_loss += loss.item()

    return total_loss / len(dataloader)

In [15]:
num_epochs = 30

for epoch in range(1, num_epochs + 1):
    train_loss = train_epoch(model, train_loader, optimizer, criterion)
    # val_loss = validate(model, test_loader, criterion)

    # print(f"Epoch {epoch:02d} | Train Loss: {train_loss:.6f} | Val Loss: {val_loss:.6f}")

  node_feats = torch.tensor(sg["node_features"], dtype=torch.float)
  edge_index = torch.tensor(sg["edge_index"], dtype=torch.long)


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [12]:
device= torch.device("cuda" if torch.cuda.is_available() else "cpu")    
print(f"Training complete. Model is on device: {device}")
total_loss = 0.0

for batch in train_loader:
        batch = {k: v[0] for k, v in batch.items()}  # unwrap batch_size=1
 
        print(batch["node_features_seq"][-1].shape)
        print(batch["edge_index_seq"][-1].shape)
        print(batch["batch_seq"][-1].shape)
        print(batch["prev_positions_seq"][-1].shape)
        print(batch["target_position"][-1].shape)


        node_feats = batch["node_features_seq"][-1].to(device)         # use latest frame
        edge_index = batch["edge_index_seq"][-1].to(device)
        batch_tensor = batch["batch_seq"][-1].to(device)
        prev_position = batch["prev_positions_seq"][-1].to(device)     # last known position
        target_position = batch["target_position"].to(device)
        model(node_feats, edge_index, batch_tensor, prev_position)

Training complete. Model is on device: cuda
torch.Size([9, 9])
torch.Size([2, 9])
torch.Size([9])
torch.Size([3])
torch.Size([])


  node_feats = torch.tensor(sg["node_features"], dtype=torch.float)
  edge_index = torch.tensor(sg["edge_index"], dtype=torch.long)


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [5]:
import os
import pickle
import torch
from torch.utils.data import Dataset
from torch_geometric.data import Data

class SceneGraphSingleFrameDataset(Dataset):
    def __init__(self, graph_dir, position_txt):
        self.graph_files = sorted([
            os.path.join(graph_dir, f)
            for f in os.listdir(graph_dir)
            if f.endswith(".pkl")
        ])

        # Load positions
        with open(position_txt, "r") as f:
            self.positions = [
                torch.tensor([float(x), float(y), float(z)], dtype=torch.float)
                for _, x, y, z in (line.strip().split(",") for line in f)
            ]

        self.length = min(len(self.graph_files), len(self.positions)) - 1  # t and t+1

    def __len__(self):
        return self.length

    def __getitem__(self, idx):
        with open(self.graph_files[idx], "rb") as f:
            sg = pickle.load(f)

        node_features = torch.tensor(sg["node_features"], dtype=torch.float)
        edge_index = torch.tensor(sg["edge_index"], dtype=torch.long)

        prev_position = self.positions[idx]              # t
        target_position = self.positions[idx + 1]        # t + 1

        batch = torch.zeros(node_features.size(0), dtype=torch.long)

        return {
            "node_features": node_features,
            "edge_index": edge_index,
            "batch": batch,
            "prev_position": prev_position,
            "target_position": target_position
        }


In [6]:
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_mean_pool

class GCN_SingleFramePositionPredictor(nn.Module):
    def __init__(self, in_dim=9, gcn_hidden_dim=32, mlp_hidden_dim=64):
        super().__init__()
        self.gcn1 = GCNConv(in_dim, gcn_hidden_dim)
        self.gcn2 = GCNConv(gcn_hidden_dim, gcn_hidden_dim)

        self.mlp = nn.Sequential(
            nn.Linear(gcn_hidden_dim + 3, mlp_hidden_dim),
            nn.ReLU(),
            nn.Linear(mlp_hidden_dim, 3)
        )

    def forward(self, node_features, edge_index, batch, prev_position):
        # Safety check
        assert edge_index.max().item() < node_features.size(0), "edge_index out of bounds"

        # Move all to same device
        device = node_features.device
        edge_index = edge_index.to(device)
        batch = batch.to(device)
        prev_position = prev_position.to(device)

        x = F.relu(self.gcn1(node_features, edge_index))
        x = F.relu(self.gcn2(x, edge_index))

        graph_embed = global_mean_pool(x, batch)

        if prev_position.ndim == 1:
            prev_position = prev_position.unsqueeze(0)

        combined = torch.cat([graph_embed, prev_position], dim=1)
        return self.mlp(combined)


In [8]:
from torch.utils.data import DataLoader, random_split

graph_dir = "/home/irfan/roadscene2vec/examples/town2/"
position_txt = "/home/irfan/roadscene2vec/examples/transferdata/pos.txt"

dataset = SceneGraphSingleFrameDataset(graph_dir, position_txt)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Custom collate_fn to return lists (no padding)
def custom_collate(batch):
    return batch

train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True, collate_fn=custom_collate)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, collate_fn=custom_collate)


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = GCN_SingleFramePositionPredictor().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.MSELoss()

for epoch in range(10):
    model.train()
    total_loss = 0

    for batch in train_loader:
        sample = batch[0]  # since batch_size = 1

        node_features = sample["node_features"].to(device)
        edge_index = sample["edge_index"].to(device)
        batch_tensor = sample["batch"].to(device)
        prev_position = sample["prev_position"].to(device)
        target_position = sample["target_position"].to(device).unsqueeze(0)

        optimizer.zero_grad()
        output = model(node_features, edge_index, batch_tensor, prev_position)
        loss = loss_fn(output, target_position)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")


  node_features = torch.tensor(sg["node_features"], dtype=torch.float)
  edge_index = torch.tensor(sg["edge_index"], dtype=torch.long)


Epoch 1, Loss: 376284.7052
Epoch 2, Loss: 836.0653
Epoch 3, Loss: 1244.5343
Epoch 4, Loss: 1281.2491


sequence model

In [5]:
import torch
from torch.utils.data import Dataset
import os
import pickle

def pad_tensor(tensor, target_shape):
    padded = torch.zeros(*target_shape, dtype=tensor.dtype)
    slices = tuple(slice(0, min(s, t)) for s, t in zip(tensor.shape, target_shape))
    padded[slices] = tensor[slices]
    return padded

class ScenegraphSequenceDataset(Dataset):
    def __init__(self, graph_dir, position_txt, sequence_length=5):
        self.graph_dir = graph_dir
        self.sequence_length = sequence_length

        self.graph_files = sorted([
            os.path.join(graph_dir, f)
            for f in os.listdir(graph_dir)
            if f.endswith(".pkl")
        ])

        with open(position_txt, "r") as f:
            self.positions = [
                torch.tensor([float(x), float(y), float(z)], dtype=torch.float)
                for _, x, y, z in (line.strip().split(",") for line in f)
            ]

        self.max_index = min(len(self.graph_files), len(self.positions)) - 1
        self.valid_range = self.max_index - self.sequence_length

    def __len__(self):
        return self.valid_range

    def __getitem__(self, idx):
        node_features_seq = []
        edge_index_seq = []
        batch_seq = []
        prev_positions_seq = []

        for i in range(idx, idx + self.sequence_length):
            with open(self.graph_files[i], "rb") as f:
                sg = pickle.load(f)

            node_feats = torch.tensor(sg["node_features"], dtype=torch.float)
            padded_node_feats = pad_tensor(node_feats, (9, 9))

            edge_index = torch.tensor(sg["edge_index"], dtype=torch.long)
            padded_edge_index = pad_tensor(edge_index, (2, 9))

            batch = torch.zeros(9, dtype=torch.long)  # fixed size

            node_features_seq.append(padded_node_feats)
            edge_index_seq.append(padded_edge_index)
            batch_seq.append(batch)
            prev_positions_seq.append(self.positions[i])

        target_position = self.positions[idx + self.sequence_length]

        return {
            "node_features_seq": torch.stack(node_features_seq),        # [seq_len, 9, 9]
            "edge_index_seq": torch.stack(edge_index_seq),              # [seq_len, 2, 9]
            "batch_seq": torch.stack(batch_seq),                        # [seq_len, 9]
            "prev_positions_seq": torch.stack(prev_positions_seq),      # [seq_len, 3]
            "target_position": target_position                          # [3]
        }


In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_mean_pool

class GCN_SingleFramePositionPredictor(nn.Module):
    def __init__(self, in_dim, gcn_hidden_dim=32, mlp_hidden_dim=64, sequence_length=5):
        super().__init__()
        self.sequence_length = sequence_length
        self.gcn1 = GCNConv(in_dim, gcn_hidden_dim)
        self.gcn2 = GCNConv(gcn_hidden_dim, gcn_hidden_dim)

        self.mlp = nn.Sequential(
            nn.Linear((gcn_hidden_dim + 3) * sequence_length, mlp_hidden_dim),
            nn.ReLU(),
            nn.Linear(mlp_hidden_dim, 3)
        )

    def forward(self, node_features_seq, edge_index_seq, batch_seq, prev_positions_seq):
        """
        Each input: shape [batch_size, seq_len, ...]
        """
        batch_size, seq_len, N, in_dim = node_features_seq.shape
        gcn_outputs = []

        for t in range(seq_len):
            x = node_features_seq[:, t]       # [B, N, in_dim]
            ei = edge_index_seq[:, t]         # [B, 2, E]
            b = batch_seq[:, t]               # [B, N]
            pos = prev_positions_seq[:, t]    # [B, 3]

            batch_gcn_out = []
            for i in range(batch_size):
                out = F.relu(self.gcn1(x[i], ei[i]))
                out = F.relu(self.gcn2(out, ei[i]))
                pooled = global_mean_pool(out, b[i])
                combined = torch.cat([pooled, pos[i].unsqueeze(0)], dim=1)
                batch_gcn_out.append(combined)

            gcn_outputs.append(torch.cat(batch_gcn_out, dim=0))  # shape: [B, D+3]

        full_seq_embed = torch.cat(gcn_outputs, dim=1)  # [B, (D+3)*seq_len]
        return self.mlp(full_seq_embed)  # [B, 3]


In [7]:
from torch.utils.data import DataLoader, random_split
graph_dir = "/home/irfan/roadscene2vec/examples/town2/"
position_txt = "/home/irfan/roadscene2vec/examples/transferdata/pos.txt"
dataset = ScenegraphSequenceDataset(
    graph_dir=graph_dir,
    position_txt=position_txt,
    sequence_length=5
)

train_size = int(0.8 * len(dataset))
train_dataset, test_dataset = random_split(dataset, [train_size, len(dataset) - train_size])

def collate_fn(batch):
    return {
        key: torch.stack([item[key] for item in batch])
        for key in batch[0]
    }

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, collate_fn=collate_fn)

model = GCN_SingleFramePositionPredictor(in_dim=9).cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.MSELoss()

for epoch in range(10):
    model.train()
    for batch in train_loader:
        for k in batch:
            batch[k] = batch[k].cuda()

        pred = model(
            batch["node_features_seq"],
            batch["edge_index_seq"],
            batch["batch_seq"],
            batch["prev_positions_seq"]
        )

        loss = loss_fn(pred, batch["target_position"])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}: Loss = {loss.item():.4f}")


  node_feats = torch.tensor(sg["node_features"], dtype=torch.float)
  edge_index = torch.tensor(sg["edge_index"], dtype=torch.long)
/pytorch/aten/src/ATen/native/cuda/IndexKernel.cu:94: operator(): block: [0,0,0], thread: [8,0,0] Assertion `-sizes[i] <= index && index < sizes[i] && "index out of bounds"` failed.


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
