In [1]:
import os
import torch
import torchvision
from torchvision import transforms as T
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from pycocotools.coco import COCO
import numpy as np


In [2]:
# Clone the torchvision repository to get the helper files
def download_helper_files():
    import subprocess
    import shutil

    repo_url = 'https://github.com/pytorch/vision.git'
    clone_dir = 'vision_temp'
    utils_dir = 'vision_utils'

    helper_files = [
        'utils.py',
        'engine.py',
        'coco_utils.py',
        'coco_eval.py',
        'transforms.py',  
        'group_by_aspect_ratio.py',
        'presets.py'
    ]
    
    # Check if utils_dir/utils.py exists
    if not os.path.exists(os.path.join(utils_dir, 'utils.py')):
        # Check if clone_dir exists
        if os.path.exists(clone_dir):
            print(f"'{clone_dir}' exists. Using existing repository.")
        else:
            print("Cloning torchvision repository to download helper files...")
            subprocess.run(['git', 'clone', repo_url, clone_dir])

        # Path to the helper files in the cloned repo
        src_dir = os.path.join(clone_dir, 'references', 'detection')

        # Create the utils directory if it doesn't exist
        if not os.path.exists(utils_dir):
            os.makedirs(utils_dir)

        # Copy the helper files to the utils directory
        for file_name in helper_files:
            src_file = os.path.join(src_dir, file_name)
            dst_file = os.path.join(utils_dir, file_name)
            if os.path.exists(src_file):
                shutil.copy(src_file, dst_file)
                print(f"Copied {file_name} to the utils directory.")
            else:
                print(f"{file_name} not found in the repository.")

        # Optionally, remove the cloned repository
        # If you want to remove 'vision_temp' after copying, uncomment the following lines:
        # shutil.rmtree(clone_dir)
        # print("Removed temporary cloned repository.")
    else:
        print("Helper files already exist in the utils directory.")

# Download the helper files
download_helper_files()


Helper files already exist in the utils directory.


In [3]:
# Verify that MetricLogger exists in utils.py
with open('vision_utils/utils.py', 'r') as file:
    content = file.read()
    if 'class MetricLogger' in content:
        print("MetricLogger class is present in utils.py")
    else:
        print("MetricLogger class is NOT present in utils.py")


MetricLogger class is present in utils.py


In [4]:
import sys
sys.path.append('vision_utils')


from vision_utils import utils
from vision_utils import engine
from vision_utils.engine import train_one_epoch, evaluate
from vision_utils.utils import collate_fn

from engine import train_one_epoch, evaluate


In [5]:
class COCOMaskDataset(Dataset):
    def __init__(self, root, annFile, transforms=None):
        self.root = root
        self.coco = COCO(annFile)
        self.ids = list(sorted(self.coco.imgs.keys()))
        self.transforms = transforms

    def __getitem__(self, index):
        # Get image ID
        img_id = self.ids[index]
        # Load image
        img_info = self.coco.loadImgs(img_id)[0]
        path = img_info['file_name']
        img = Image.open(os.path.join(self.root, path)).convert("RGB")

        # Load annotations
        ann_ids = self.coco.getAnnIds(imgIds=img_id)
        anns = self.coco.loadAnns(ann_ids)

        # Extract boxes, labels, masks
        boxes = []
        labels = []
        masks = []
        for ann in anns:
            x_min, y_min, width, height = ann['bbox']
            boxes.append([x_min, y_min, x_min + width, y_min + height])
            labels.append(ann['category_id'])
            masks.append(self.coco.annToMask(ann))

        # Convert to tensors
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        masks = torch.as_tensor(masks, dtype=torch.uint8)

        # Additional fields
        image_id = torch.tensor([img_id])
        area = torch.as_tensor([ann['area'] for ann in anns], dtype=torch.float32)
        iscrowd = torch.as_tensor([ann['iscrowd'] for ann in anns], dtype=torch.int64)

        # Compile target
        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        target['masks'] = masks
        target['image_id'] = image_id
        target['area'] = area
        target['iscrowd'] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.ids)


In [6]:
def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        # Add data augmentation here if needed
        pass
    return T.Compose(transforms)


In [7]:
def get_instance_segmentation_model(num_classes):
    # Load an instance segmentation model pre-trained on COCO
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(weights='DEFAULT')
    # Get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # Replace the pre-trained head with a new one
    model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(
        in_features, num_classes)
    # Get number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    # Replace the mask predictor with a new one
    model.roi_heads.mask_predictor = torchvision.models.detection.mask_rcnn.MaskRCNNPredictor(
        in_features_mask, hidden_layer, num_classes)
    return model


In [8]:
# Specify the path
path = r'C:\Users\alway\OneDrive\Documents\GitHub\Applied-AI\hw2\datasets\coco_dataset\annotations'

# List all files in the specified directory
files = os.listdir(path)

# Print the list of files
print("Files in the annotations directory:")
for file in files:
    print(file)


Files in the annotations directory:
captions_train2017.json
captions_val2017.json
instances_train2017.json
instances_val2017.json
person_keypoints_train2017.json
person_keypoints_val2017.json


In [9]:
# Use GPU if available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)
# Base directory
base_dir = r'C:/Users/alway/OneDrive/Documents/GitHub/Applied-AI/hw2/datasets/coco_dataset'

# Paths to images and annotations
train_dir = os.path.join(base_dir, 'train2017')
train_ann_file = os.path.join(base_dir, 'annotations', 'instances_train2017.json')
val_dir = os.path.join(base_dir, 'val2017')
val_ann_file = os.path.join(base_dir, 'annotations', 'instances_val2017.json')

# Create datasets
dataset = COCOMaskDataset(train_dir, train_ann_file, transforms=get_transform(train=True))
dataset_test = COCOMaskDataset(val_dir, val_ann_file, transforms=get_transform(train=False))

# Define data loaders
data_loader = DataLoader(
    dataset, batch_size=2, shuffle=True, num_workers=4,
    collate_fn=utils.collate_fn)

data_loader_test = DataLoader(
    dataset_test, batch_size=1, shuffle=False, num_workers=4,
    collate_fn=utils.collate_fn)


cuda
loading annotations into memory...
Done (t=18.33s)
creating index...
index created!
loading annotations into memory...
Done (t=0.55s)
creating index...
index created!


In [10]:
# Number of classes (including background)
num_classes = 91  # COCO has 80 classes + 1 background + some extra

# Get the model using the helper function
model = get_instance_segmentation_model(num_classes)
# Move model to the right device
model.to(device)


MaskRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(in

In [11]:
# Construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(
    params, lr=0.005, momentum=0.9, weight_decay=0.0005)

# Learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(
    optimizer, step_size=3, gamma=0.1)


In [12]:
#debuging functions missing etc
import utils
print("utils module imported from:", utils.__file__)

import engine
print("engine module imported from:", engine.__file__)

print("PyTorch version:", torch.__version__)
print("TorchVision version:", torchvision.__version__)

import sys
for path in sys.path:
    possible_utils = os.path.join(path, 'utils.py')
    if os.path.exists(possible_utils):
        print(f"Found utils.py at: {possible_utils}")



utils module imported from: c:\Users\alway\OneDrive\Documents\GitHub\Applied-AI\hw2\rcnn\vision_utils\utils.py
engine module imported from: c:\Users\alway\OneDrive\Documents\GitHub\Applied-AI\hw2\rcnn\vision_utils\engine.py
PyTorch version: 2.4.1+cu118
TorchVision version: 0.19.1+cu118
Found utils.py at: vision_utils\utils.py


In [None]:
num_epochs = 1

for epoch in range(num_epochs):
    # Train for one epoch
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=100)
    # Update the learning rate
    lr_scheduler.step()
    # Evaluate on the test dataset
    evaluate(model, data_loader_test, device=device)


In [None]:
# Save the trained model
torch.save(model.state_dict(), 'mask_rcnn_coco.pth')
print("Model saved as mask_rcnn_coco.pth")
