In [26]:
import torch
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import json
import os
from PIL import Image
from torchvision import transforms
from torch_geometric.data import Data
import torch.nn as nn
from torch_geometric.nn import GCNConv

In [27]:
class DelftBikesGraphDataset(Dataset):
    def __init__(self, annotations_file, image_dir, transform=None, resize=(640, 480)):
        with open(annotations_file) as f:
            self.annotations = json.load(f)
        
        self.image_dir = image_dir
        self.transform = transform
        
        # Resize the image to a fixed size (e.g., 640x480)
        self.resize = resize
        
        # Get all parts names and their corresponding ids
        self.part_names = self.annotations["all_parts"]
        self.part2id = {name: idx + 1 for idx, name in enumerate(self.part_names)}
        
    def __len__(self):
        return len(self.annotations) - 1  # Exclude "all_parts" key
    
    def __getitem__(self, idx):
        image_name = list(self.annotations.keys())[idx + 1]  # Skip the "all_parts" key
        image_data = self.annotations[image_name]
        
        # Load the image
        img_path = os.path.join(self.image_dir, image_name)
        image = Image.open(img_path).convert("RGB")
        
        # Resize image
        image = image.resize(self.resize)  # Resize to the fixed size (640, 480)
        
        # Extract parts and create boxes and labels
        target = {}
        boxes = []
        labels = []
        
        for part in image_data["available_parts"]:
            part_name = part["part_name"]
            relative_bbox = part["relative_bounding_box"]
            
            # Convert relative bbox to absolute coordinates
            x_min = relative_bbox["left"] * image.width
            y_min = relative_bbox["top"] * image.height
            width = relative_bbox["width"] * image.width
            height = relative_bbox["height"] * image.height
            x_max = x_min + width
            y_max = y_min + height
            boxes.append([x_min, y_min, x_max, y_max])
            labels.append(self.part2id[part_name])
        
        # Padding for bounding boxes (to ensure equal length)
        max_boxes = max([len(image_data["available_parts"]) for image_data in self.annotations.values()])
        padding = max_boxes - len(boxes)
        
        # Pad the boxes with zeros if needed
        if padding > 0:
            boxes.extend([[0, 0, 0, 0]] * padding)  # Pad with dummy boxes of [0, 0, 0, 0]
            labels.extend([0] * padding)  # Pad with dummy labels (0)

        # Create target dictionary
        target["boxes"] = torch.tensor(boxes, dtype=torch.float32)
        target["labels"] = torch.tensor(labels, dtype=torch.int64)
        
        # Transform the image if any
        if self.transform:
            image = self.transform(image)
        
        # Create empty graph data for this image (we don't use graph edges here)
        edge_index = torch.tensor([[], []], dtype=torch.long)  # No edges in this basic setup
        x = torch.ones(len(boxes), dtype=torch.float32)  # Each box has a feature vector (dummy here)

        graph_data = Data(x=x, edge_index=edge_index)
        
        return image, target, graph_data

In [28]:
class GraphRCNN(nn.Module):
    def __init__(self, num_classes, num_parts):
        super(GraphRCNN, self).__init__()
        self.num_classes = num_classes
        
        # Define GCN layers
        self.gcn1 = GCNConv(num_parts, 64)
        self.gcn2 = GCNConv(64, 128)
        
        # Classification layers
        self.fc1 = nn.Linear(128, 256)
        self.fc2 = nn.Linear(256, num_classes)
        
        # Bounding box regression layers
        self.bbox_fc1 = nn.Linear(128, 128)
        self.bbox_fc2 = nn.Linear(128, 4)

    def forward(self, images, edge_index):
        # Graph convolution layers
        x = self.gcn1(images.x, edge_index)
        x = F.relu(x)
        x = self.gcn2(x, edge_index)
        x = F.relu(x)

        # Classifier
        class_out = self.fc1(x)
        class_out = F.relu(class_out)
        class_out = self.fc2(class_out)

        # Bounding box regression
        bbox_out = self.bbox_fc1(x)
        bbox_out = F.relu(bbox_out)
        bbox_out = self.bbox_fc2(bbox_out)

        return {"labels": class_out, "boxes": bbox_out}

In [29]:
def compute_loss(predictions, targets):
    pred_labels = predictions['labels']
    pred_boxes = predictions['boxes']
    
    gt_labels = targets['labels']
    gt_boxes = targets['boxes']
    
    classification_loss = F.cross_entropy(pred_labels, gt_labels)
    bbox_loss = smooth_l1_loss(pred_boxes, gt_boxes)
    
    total_loss = classification_loss + bbox_loss
    return total_loss


def smooth_l1_loss(pred_boxes, gt_boxes, beta=1.0):
    diff = torch.abs(pred_boxes - gt_boxes)
    loss = torch.where(diff < beta, 0.5 * diff ** 2, beta * (diff - 0.5 * beta))
    return loss.mean()


In [30]:
train_dataset = DelftBikesGraphDataset(annotations_file='new_annotations.json', image_dir='DelftBikes/train', transform=transform, resize=(640, 480))
train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)


# Initialize the model
model = GraphRCNN(num_classes=len(train_dataset.part_names) + 1, num_parts=len(train_dataset.part_names))
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Move model to the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training loop
for epoch in range(10):
    model.train()
    for images, targets, graph_data in train_loader:
        images = images.to(device)
        targets = targets.to(device)
        edge_index = graph_data.edge_index.to(device)
        x = graph_data.x.to(device)
        
        optimizer.zero_grad()
        
        # Get predictions
        predictions = model(images, edge_index)
        
        # Compute loss
        loss = compute_loss(predictions, targets)
        
        # Backpropagate and optimize
        loss.backward()
        optimizer.step()
        
    print(f"Epoch {epoch + 1}, Loss: {loss.item()}")

TypeError: list indices must be integers or slices, not str

In [None]:
def evaluate_and_find_missing(model, dataloader):
    model.eval()
    with torch.no_grad():
        for images, targets, graph_data in dataloader:
            images = images.to(device)
            predictions = model(images, graph_data.edge_index.to(device))
            
            for idx in range(len(images)):
                # Get the detected labels (predicted parts)
                detected_labels = predictions[idx]['labels'].cpu().numpy()
                detected_parts = [label_to_part(label) for label in detected_labels]
                
                # Subtract detected parts from all parts to find missing ones
                missing_parts = list(set(train_dataset.part_names) - set(detected_parts))
                print(f"Missing parts for image {idx + 1}: {missing_parts}")

def label_to_part(label):
    return train_dataset.part_names[label - 1]  # Adjust if needed based on your indexing

# After training, evaluate on the same training data (or on a validation set if available)
evaluate_and_find_missing(model, train_loader)
