In [None]:
import zipfile
import json
import os
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
from PIL import Image
import pandas as pd
import torch
import torchvision
from torch.utils.data import Dataset
import numpy as np
from skimage.draw import polygon
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision.transforms as T


In [None]:
zip_file_path = "/content/Original.zip"
extract_path = "image_data"

try:
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
    print(f"Successfully extracted '{zip_file_path}' to '{extract_path}'")
except FileNotFoundError:
    print(f"Error: File '{zip_file_path}' not found.")
except zipfile.BadZipFile:
    print(f"Error: '{zip_file_path}' is not a valid zip file.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

In [None]:
# Load annotation file
annotation_path = '/content/labels_my-project-name_2025-01-23-07-57-12.json'
image_folder = "/content/image_data/Original"

# Load annotations
with open(annotation_path, 'r') as f:
    data = json.load(f)

images = {img['id']: img for img in data['images']}
annotations = data['annotations']
categories = {cat['id']: cat['name'] for cat in data['categories']}

# Visualize the dataset
def visualize_dataset(image_id, annotations, image_folder, images, categories):
    img_info = images[image_id]
    img_path = os.path.join(image_folder, img_info['file_name'])
    img = Image.open(img_path)

    # Plot the image
    plt.figure(figsize=(10, 10))
    plt.imshow(img)
    plt.axis('off')

    # Overlay annotations
    for ann in annotations:
        if ann['image_id'] == image_id:
            category_name = categories[ann['category_id']]
            segmentations = ann['segmentation']

            for seg in segmentations:
                # Convert flat list to (x, y) pairs
                reshaped_seg = [(seg[i], seg[i + 1]) for i in range(0, len(seg), 2)]

                # Draw segmentation polygon
                poly = Polygon(reshaped_seg, closed=True, edgecolor='red', fill=False, linewidth=2, label=category_name)
                plt.gca().add_patch(poly)

            # Add category label
            bbox = ann['bbox']
            plt.text(
                bbox[0], bbox[1] - 10, category_name, color='yellow', fontsize=10, bbox=dict(facecolor='black', alpha=0.5)
            )

    plt.show()

# Visualize each image with annotations
for img_id in images.keys():
    visualize_dataset(img_id, annotations, image_folder, images, categories)


In [None]:
# Load annotation file
annotation_path = '/content/labels_my-project-name_2025-01-23-07-57-12.json'

# Load annotations
with open(annotation_path, 'r') as f:
    data = json.load(f)

# Creating a DataFrame for images
image_data = []
for img in data['images']:
    image_data.append({
        'id': img['id'],
        'file_name': img['file_name'],
        'width': img['width'],
        'height': img['height']
    })

images_df = pd.DataFrame(image_data)

# Creating a DataFrame for annotations
annotation_data = []
for ann in data['annotations']:
    annotation_data.append({
        'image_id': ann['image_id'],
        'category_id': ann['category_id'],
        'segmentation': ann['segmentation'],
        'bbox': ann['bbox'],
        'area': ann['area'],
        'iscrowd': ann['iscrowd']
    })

annotations_df = pd.DataFrame(annotation_data)

# Merge the two DataFrames based on image_id
merged_df = pd.merge(annotations_df, images_df, left_on='image_id', right_on='id', how='inner')

# Show the merged DataFrame
print(merged_df.head())


In [None]:
class CustomDataset(Dataset):
    def __init__(self, df, image_folder, transform=None):
        self.df = df
        self.image_folder = image_folder
        self.transform = transform

        # Categories dictionary (id to name mapping)
        self.categories = {1: 'name(en)', 2: 'name(urd)', 3: 'fathername(en)', 4: 'fathername(urd)'}  # Update this with actual category names

    def __len__(self):
        return len(self.df['image_id'].unique())

    def __getitem__(self, idx):
        image_id = self.df['image_id'].unique()[idx]
        image_info = self.df[self.df['image_id'] == image_id].iloc[0]

        # Load the image
        img_path = os.path.join(self.image_folder, image_info['file_name'])
        image = Image.open(img_path).convert("RGB")

        # Get annotations for the image
        annotations = self.df[self.df['image_id'] == image_id]

        boxes = []
        masks = []
        labels = []
        area = []
        iscrowd = []

        # Convert annotations to appropriate format for Mask R-CNN
        for _, ann in annotations.iterrows():
            # Bounding box
            bbox = ann['bbox']
            boxes.append(bbox)

            # Segmentation (polygon)
            segmentation = ann['segmentation']
            mask = np.zeros((image_info['height'], image_info['width']), dtype=np.uint8)
            for seg in segmentation:
                poly = np.array(seg).reshape((-1, 2))
                rr, cc = polygon(poly[:, 1], poly[:, 0], mask.shape)
                mask[rr, cc] = 1
            masks.append(mask)

            # Area
            area.append(ann['area'])

            # Iscrowd
            iscrowd.append(ann['iscrowd'])

            # Category
            labels.append(ann['category_id'])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        masks = torch.as_tensor(masks, dtype=torch.uint8)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        area = torch.as_tensor(area, dtype=torch.float32)
        iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64)

        # Additional information (image_id, area, iscrowd, etc.)
        image_id = torch.tensor([image_id])

        target = {
            'boxes': boxes,
            'labels': labels,
            'masks': masks,
            'image_id': image_id,
            'area': area,
            'iscrowd': iscrowd
        }

        if self.transform:
            image, target = self.transform(image, target)

        return image, target

# Example of using the dataset class
dataset = CustomDataset(df=merged_df, image_folder="/content/image_data/Original")

# one sample
image, target = dataset[0]
print(image.size, target)


In [None]:
# pre-trained Mask R-CNN model
model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)

# the classifier (plus background)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, 4 + 1)  # 4 categories + 1 background

# Mask predictor
in_mask_features = model.roi_heads.mask_predictor.conv5_mask.in_channels
model.roi_heads.mask_predictor = torchvision.models.detection.mask_rcnn.MaskRCNNPredictor(in_mask_features, 256, 4 + 1)  # 4 categories + 1 background

# Move model to the GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


In [None]:
# Define transformations
transform = T.Compose([T.ToTensor()])  # Convert images to tensor

# Create DataLoader
train_loader = DataLoader(dataset, batch_size=2, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))

# Optimizer Adam
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Track losses for visualization
losses_list = []

# Training
num_epochs = 30
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0  # Track the loss for this epoch

    for images, targets in train_loader:
        # Convert images to tensors and move them to the device
        images = [transform(image).to(device) for image in images]  # Applying transform here
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        optimizer.zero_grad()
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        losses.backward()
        optimizer.step()

        running_loss += losses.item()

    # Average loss for the epoch
    avg_loss = running_loss / len(train_loader)
    losses_list.append(avg_loss)

    print(f"Epoch {epoch + 1}, Loss: {avg_loss}")

# Plot training loss curve
plt.figure(figsize=(10, 6))
plt.plot(range(1, num_epochs + 1), losses_list, label="Training Loss", color='b')
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training Loss Over Epochs")
plt.legend()
plt.show()


In [None]:
# prompt: check the value at index 0

merged_df.iloc[0]

In [None]:
# prompt: list all the bounding boxes and masks

import json
import os
import matplotlib.pyplot as plt
from matplotlib.patches import Polygon
from PIL import Image
import pandas as pd
# ... (rest of your imports and code)

# Load annotation file
annotation_path = '/content/labels_my-project-name_2025-01-23-07-57-12.json'

# Load annotations
with open(annotation_path, 'r') as f:
    data = json.load(f)

annotations = data['annotations']

# Iterate through annotations and print bounding boxes and segmentation masks
for ann in annotations:
    print(f"Image ID: {ann['image_id']}")
    print(f"Bounding Box: {ann['bbox']}") # [x_min, y_min, width, height]
    print(f"Segmentation Mask: {ann['segmentation']}") # List of polygon coordinates
    print("-" * 20)