### get the data

In [15]:
from google.colab import drive
drive.mount('/content/drive')
import os
os.chdir('/content/drive/MyDrive/dataverse_files')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
from torch.utils.data import Dataset
import os
from torchvision.io import read_image
from torchvision.ops.boxes import masks_to_boxes
import csv
import torch
import torchvision

### define the dataset

In [3]:
from torchvision.transforms import v2 as T


def get_transform(train):
    transforms = []
    transforms.append(T.ToDtype(torch.float, scale=True))
    transforms.append(T.ToPureTensor())
    return T.Compose(transforms)

In [4]:
def str_label_to_int(label):
  if label == 'akiec':
    return 1
  if label == 'bcc':
    return 2
  if label == 'bkl':
    return 3
  if label == 'df':
    return 4
  if label == 'mel':
    return 5
  if label == 'nv':
    return 6
  if label == 'vasc':
    return 7

In [5]:
class HamDataset(Dataset):
  def __init__(self,image_dic,seg_dic,annotation_path,transform):
    self.image_dic = image_dic
    self.seg_dic = seg_dic
    self.images = list(sorted(os.listdir(image_dic)))
    self.segs = list(sorted(os.listdir(seg_dic)))
    self.transform  = transform
    self.annotation_path = annotation_path

    # read the csv file
    with open(annotation_path, mode='r') as file:
      reader = csv.reader(file)
      self.label = [rows[1] for rows in reader]



  def __getitem__(self,idx):
    img_path = os.path.join(self.image_dic,self.images[idx])
    seg_path = os.path.join(self.seg_dic,self.segs[idx])
    img = read_image(img_path)
    seg = read_image(seg_path)



    obj_ids = torch.unique(seg)
    if(len(obj_ids) != 1):
      obj_ids = obj_ids[1:]
    num_objs = len(obj_ids)
    # here's the thing, some of the masks cover the whole background

    # split the color-encoded mask into a set
    # of binary masks
    masks = (seg == obj_ids[:, None, None]).to(dtype=torch.uint8)

    # get bounding box coordinates for each mask
    boxes = masks_to_boxes(masks)
    # try to fix the label issue
    labels = torch.ones((num_objs,), dtype=torch.int64)
    labels.fill_(str_label_to_int(self.label[idx]))

    # I THINK THIS IS WHAT HAD LED TO THE ERROR
    #box = masks_to_boxes(seg)

    image_id = idx

    area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
    # suppose all instances are not crowd
    iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

    # Wrap sample and targets into torchvision tv_tensors:
    # img = tv_tensors.Image(img)

    if self.transform is not None:
      img = self.transform(img)

    target = {}
    target["masks"] = masks
    target["boxes"] = boxes
    target["labels"] = labels
    target["image_id"] = image_id
    target["area"] = area
    target["iscrowd"] = iscrowd

    return img, target

  def __len__(self):
    return len(self.images)



### get the model and change the output channels 

In [6]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor


def get_model_instance_segmentation(num_classes):
    # load an instance segmentation model pre-trained on COCO
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(weights="DEFAULT")

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # now get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    # and replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(
        in_features_mask,
        hidden_layer,
        num_classes
    )

    return model

In [7]:
from engine import train_one_epoch, evaluate
import utils
# train on the GPU or on the CPU, if a GPU is not available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')


IAMGE_DIC = '/content/drive/MyDrive/dataverse_files/HAM10000_images_part_1'
SEG_DIC = '/content/drive/MyDrive/dataverse_files/HAM10000_segmentations_lesion_tschandl/HAM10000_segmentations_lesion_tschandl'
ANNOTITION_PATH = '/content/drive/MyDrive/dataverse_files/HAM10000_metadata.csv'
train_set = HamDataset(IAMGE_DIC,SEG_DIC,ANNOTITION_PATH,transform = get_transform(train = True))
test_set = HamDataset(IAMGE_DIC,SEG_DIC,ANNOTITION_PATH,transform = get_transform(train = False))




### train

In [8]:
indices = torch.randperm(len(train_set)).tolist()
dataset = torch.utils.data.Subset(train_set, indices[:-100])
dataset_test = torch.utils.data.Subset(test_set, indices[-100:])
# the ratio of train and test is 49:1, which might be too much

In [9]:
# define training and test data loaders
data_loader = torch.utils.data.DataLoader(
    dataset,
    batch_size=2,
    shuffle=True,
    num_workers=4,
    collate_fn=utils.collate_fn
)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test,
    batch_size=1,
    shuffle=False,
    num_workers=4,
    collate_fn=utils.collate_fn
)

num_classes = 8

# get the model using our helper function
model = get_model_instance_segmentation(num_classes)

# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(
    params,
    lr=0.005,
    momentum=0.9,
    weight_decay=0.0005
)

# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(
    optimizer,
    step_size=3,
    gamma=0.1
)

# let's train it just for 2 epochs
num_epochs = 2

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_test, device=device)

print("That's it!")

Downloading: "https://download.pytorch.org/models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth" to /root/.cache/torch/hub/checkpoints/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth
100%|██████████| 170M/170M [00:02<00:00, 69.0MB/s]


Epoch: [0]  [   0/2450]  eta: 3:31:21  lr: 0.000010  loss: 7.8850 (7.8850)  loss_classifier: 2.2270 (2.2270)  loss_box_reg: 0.1013 (0.1013)  loss_mask: 5.5034 (5.5034)  loss_objectness: 0.0295 (0.0295)  loss_rpn_box_reg: 0.0239 (0.0239)  time: 5.1761  data: 1.5772  max mem: 2151
Epoch: [0]  [  10/2450]  eta: 0:37:48  lr: 0.000060  loss: 4.9953 (5.1743)  loss_classifier: 2.1252 (2.0869)  loss_box_reg: 0.1220 (0.1199)  loss_mask: 2.5945 (2.9404)  loss_objectness: 0.0037 (0.0121)  loss_rpn_box_reg: 0.0127 (0.0149)  time: 0.9299  data: 0.1541  max mem: 2321
Epoch: [0]  [  20/2450]  eta: 0:29:15  lr: 0.000110  loss: 3.2162 (3.8591)  loss_classifier: 1.5483 (1.5841)  loss_box_reg: 0.1273 (0.1289)  loss_mask: 1.4881 (2.1194)  loss_objectness: 0.0037 (0.0098)  loss_rpn_box_reg: 0.0120 (0.0170)  time: 0.4999  data: 0.0102  max mem: 2321
Epoch: [0]  [  30/2450]  eta: 0:26:08  lr: 0.000160  loss: 1.6000 (2.9813)  loss_classifier: 0.3836 (1.1402)  loss_box_reg: 0.1229 (0.1243)  loss_mask: 0.9188 (

In [22]:
import torch
model_dir = '/content/drive/MyDrive/Instance_segmantation'
model_path = os.path.join(model_dir, "model"+".pth")
torch.save(model.state_dict(), model_path)

train the model for epoch 3 and 4

In [23]:
# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(
    params,
    lr=0.005,
    momentum=0.9,
    weight_decay=0.0005
)

# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(
    optimizer,
    step_size=3,
    gamma=0.1
)

# let's train it just for 2 epochs
num_epochs = 2

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_test, device=device)

print("That's it!")



Epoch: [0]  [   0/2450]  eta: 0:38:15  lr: 0.000010  loss: 0.1557 (0.1557)  loss_classifier: 0.0383 (0.0383)  loss_box_reg: 0.0186 (0.0186)  loss_mask: 0.0924 (0.0924)  loss_objectness: 0.0006 (0.0006)  loss_rpn_box_reg: 0.0058 (0.0058)  time: 0.9369  data: 0.2983  max mem: 2323
Epoch: [0]  [  10/2450]  eta: 0:24:26  lr: 0.000060  loss: 0.2289 (0.3820)  loss_classifier: 0.0383 (0.0427)  loss_box_reg: 0.0412 (0.0450)  loss_mask: 0.1249 (0.2834)  loss_objectness: 0.0010 (0.0025)  loss_rpn_box_reg: 0.0026 (0.0085)  time: 0.6011  data: 0.0349  max mem: 2323
Epoch: [0]  [  20/2450]  eta: 0:24:12  lr: 0.000110  loss: 0.2289 (0.3233)  loss_classifier: 0.0323 (0.0472)  loss_box_reg: 0.0349 (0.0403)  loss_mask: 0.1249 (0.2242)  loss_objectness: 0.0012 (0.0020)  loss_rpn_box_reg: 0.0028 (0.0095)  time: 0.5807  data: 0.0127  max mem: 2323
Epoch: [0]  [  30/2450]  eta: 0:24:01  lr: 0.000160  loss: 0.2468 (0.3259)  loss_classifier: 0.0461 (0.0501)  loss_box_reg: 0.0365 (0.0410)  loss_mask: 0.1579 (