In [1]:
import deeplake
import numpy as np
import math
import sys
import time
import torchvision
import albumentations as A
from albumentations.pytorch import ToTensorV2
import torch
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
import torchvision.models.detection.mask_rcnn

# Connect to the training dataset
# ds_train = deeplake.load('hub://activeloop/coco-train')

# num_classes = len(ds_train.categories.info.class_names)


# #dataset hosted by activeloop 
ds = deeplake.load('hub://activeloop/office-home-domain-adaptation')

#summary of dataset tensors 
ds.summary()

ds_train, ds_test = ds.random_split([0.8, 0.2])

print(f"Length of train_view is {len(ds_train)}")
print(f"Length of val_view is {len(ds_test)}")


##need to implement classes of interest for our dataset https://wandb.ai/istranic/deeplake-demos/reports/Model-Reproducibility-Using-Activeloop-Deep-Lake-and-Weights-Biases--VmlldzoyNzIzNDM1

# These are the classes we care about and they will be remapped to 0,1,2,3,4,5,6,7,8 in the model
CLASSES_OF_INTEREST = ['Real World']


# The classes of interest correspond to the following array values in the current dataset
# INDS_OF_INTEREST = [ds_train.labels.info.class_names.index(item) for item in CLASSES_OF_INTEREST]
INDS_OF_INTEREST = [ds_train.domain_categories.info.class_names.index(item) for item in CLASSES_OF_INTEREST]


# Filter the dataset to only include samples with the specified domain categories
ds_filtered_train = ds_train.filter(lambda x: 0 in x['domain_categories'])  # Assuming "Real World" is represented by 0
# ds_filtered_test = ds_test.filter('domain_categories', CLASSES_OF_INTEREST)

# Verify the lengths of the filtered datasets
print(f"Length of filtered train dataset: {len(ds_filtered_train)}")
ds_filtered_train.summary()
# print(f"Length of filtered test dataset: {len(ds_filtered_test)}")


#select only a few objects and one domain 
#split into train and test

\

Opening dataset in read-only mode as you don't have write permissions.


-

This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/activeloop/office-home-domain-adaptation



/

hub://activeloop/office-home-domain-adaptation loaded successfully.



 

Dataset(path='hub://activeloop/office-home-domain-adaptation', read_only=True, tensors=['images', 'domain_categories', 'domain_objects'])

      tensor           htype                shape              dtype  compression
      -------         -------              -------            -------  ------- 
      images           image     (15588, 4:6500, 18:6000, 3)   uint8    jpeg   
 domain_categories  class_label          (15588, 1)           uint32    None   
  domain_objects    class_label          (15588, 1)           uint32    None   
Length of train_view is 12471
Length of val_view is 3117


100%|██████████| 12471/12471 [00:00<00:00, 16373.67it/s]

Length of filtered train dataset: 0
Dataset(path='hub://activeloop/office-home-domain-adaptation', read_only=True, index=Index([()]), tensors=['images', 'domain_categories', 'domain_objects'])

      tensor           htype              shape            dtype  compression
      -------         -------            -------          -------  ------- 
      images           image     (0, 4:6500, 18:6000, 3)   uint8    jpeg   
 domain_categories  class_label          (0, 1)           uint32    None   
  domain_objects    class_label          (0, 1)           uint32    None   





In [10]:
# Augmentation pipeline using Albumentations
tform_train = A.Compose([
    A.RandomSizedBBoxSafeCrop(width=128, height=128, erosion_rate = 0.2),
    A.HorizontalFlip(p=0.5),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2(), # transpose_mask = True
], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels', 'bbox_ids'], min_area=25, min_visibility=0.6)) # 'label_fields' and 'box_ids' are all the fields that will be cut when a bounding box is cut.


# Transformation function for pre-processing the Deep Lake sample before sending it to the model
def transform(sample_in):

    # Convert boxes to Pascal VOC format
    boxes = coco_2_pascal(sample_in['boxes'])

    # Convert any grayscale images to RGB
    images = sample_in['images']
    if images.shape[2] == 1:
        images = np.repeat(images, int(3/images.shape[2]), axis = 2)

    # Pass all data to the Albumentations transformation
    # Mask must be converted to a list
    masks = sample_in['masks']
    mask_shape = masks.shape

    # This if-else statement was not necessary in Albumentations <1.3.x, because the empty mask scenario was handled gracefully inside of Albumentations. In Albumebtations >1.3.x, empty list of masks fails
    if mask_shape[2]>0:
        transformed = tform_train(image = images,
                                  masks = [masks[:,:,i].astype(np.uint8) for i in range(mask_shape[2])],
                                  bboxes = boxes,
                                  bbox_ids = np.arange(boxes.shape[0]),
                                  class_labels = sample_in['categories'],
                                  )
    else:
        transformed = tform_train(image = images,
                                  bboxes = boxes,
                                  bbox_ids = np.arange(boxes.shape[0]),
                                  class_labels = sample_in['categories'],
                                  )  
        


    # Convert boxes and labels from lists to torch tensors, because Albumentations does not do that automatically.
    # Be very careful with rounding and casting to integers, becuase that can create bounding boxes with invalid dimensions
    labels_torch = torch.tensor(transformed['class_labels'], dtype = torch.int64)

    boxes_torch = torch.zeros((len(transformed['bboxes']), 4), dtype = torch.int64)
    for b, box in enumerate(transformed['bboxes']):
        boxes_torch[b,:] = torch.tensor(np.round(box))
        

    # Filter out the masks that were dropped by filtering of bounding box area and visibility
    masks_torch = torch.zeros((len(transformed['bbox_ids']), transformed['image'].shape[1], transformed['image'].shape[2]), dtype = torch.int64)
    if len(transformed['bbox_ids'])>0:
        masks_torch = torch.tensor(np.stack([transformed['masks'][i] for i in transformed['bbox_ids']], axis = 0), dtype = torch.uint8)
    


    # Put annotations in a separate object
    target = {'masks': masks_torch, 'labels': labels_torch, 'boxes': boxes_torch}

    return transformed['image'], target


# Conversion script for bounding boxes from coco to Pascal VOC format
def coco_2_pascal(boxes):
    # Convert bounding boxes to Pascal VOC format and clip bounding boxes to make sure they have non-negative width and height

    return np.stack((boxes[:,0], boxes[:,1], boxes[:,0]+np.clip(boxes[:,2], 1, None), boxes[:,1]+np.clip(boxes[:,3], 1, None)), axis = 1)


def collate_fn(batch):
    return tuple(zip(*batch))

In [3]:
batch_size = 8

train_loader = ds_train.pytorch(num_workers = 2, shuffle = False, 
    tensors = ['images', 'masks', 'categories', 'boxes'], # Specify the tensors that are needed, so we don't load unused data
    transform = transform, 
    batch_size = batch_size,
    collate_fn = collate_fn)



In [4]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

cpu


In [5]:
# Helper function for loading the model
def get_model_instance_segmentation(num_classes):
    # Load an instance segmentation model pre-trained on COCO
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)

    # Get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # Get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    # Replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                       hidden_layer,
                                                       num_classes)

    return model

In [6]:
model = get_model_instance_segmentation(num_classes)

model.to(device)

# Specity the optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)



In [8]:
# Helper function for training for 1 epoch
def train_one_epoch(model, optimizer, data_loader, device):
    model.train()

    start_time = time.time()
    for i, data in enumerate(data_loader):

        images = list(image.to(device) for image in data[0])
        targets = [{k: v.to(device) for k, v in t.items()} for t in data[1]]
        
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()

        # Print performance statistics
        batch_time = time.time()
        speed = (i+1)/(batch_time-start_time)
        print('[%5d] loss: %.3f, speed: %.2f' %
              (i, loss_value, speed))

        if not math.isfinite(loss_value):
            print(f"Loss is {loss_value}, stopping training")
            print(loss_dict)
            break

        optimizer.zero_grad()

        losses.backward()
        optimizer.step()

In [9]:
# Train the model for 1 epoch
num_epochs = 1
for epoch in range(num_epochs):  # loop over the dataset multiple times
    print("------------------ Training Epoch {} ------------------".format(epoch+1))
    train_one_epoch(model, optimizer, train_loader, device)
    
    # --- Insert Testing Code Here ---

    print('Finished Training')

------------------ Training Epoch 1 ------------------
[    0] loss: 5.772, speed: 0.03
[    1] loss: 1.770, speed: 0.03
[    2] loss: 3.567, speed: 0.02
[    3] loss: 3.123, speed: 0.02
[    4] loss: 3.593, speed: 0.02
[    5] loss: 2.422, speed: 0.02
[    6] loss: 1.948, speed: 0.02
[    7] loss: 1.555, speed: 0.02
[    8] loss: 1.403, speed: 0.02
[    9] loss: 1.607, speed: 0.02
[   10] loss: 1.476, speed: 0.02
[   11] loss: 1.372, speed: 0.02
[   12] loss: 1.634, speed: 0.02
[   13] loss: 1.471, speed: 0.02
[   14] loss: 1.617, speed: 0.02
[   15] loss: 1.489, speed: 0.02
[   16] loss: 1.728, speed: 0.02
[   17] loss: 1.821, speed: 0.02
[   18] loss: 2.105, speed: 0.02
[   19] loss: 1.657, speed: 0.02
[   20] loss: 1.780, speed: 0.02
[   21] loss: 1.238, speed: 0.02
[   22] loss: 1.549, speed: 0.02
[   23] loss: 1.499, speed: 0.02
[   24] loss: 1.934, speed: 0.02
[   25] loss: 1.281, speed: 0.02
[   26] loss: 1.201, speed: 0.02
[   27] loss: 1.369, speed: 0.02
[   28] loss: 1.998, 

: 