In [2]:
import transforms as T
import torch
import torchvision
import os
import utils
import torchvision.transforms as trans
import torchvision.models as models
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
import cv2
from PIL import Image, ImageOps
from skimage import io, data
from skimage.measure import label
from skimage.color import label2rgb, rgb2gray, gray2rgb
from engine import train_one_epoch, evaluate
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

In [3]:
# Make our own dataset
class PVDataSet(torch.utils.data.Dataset):
    def __init__(self, root, d_transforms = None):
        self.root = root
        self.transforms = d_transforms
        
        # Load all image files
        #self.imgs = list(os.listdir(os.path.join(root, "img/")))
        #self.masks = list(os.listdir(os.path.join(root, "labels/")))
        
    def __getitem__(self, idx):
        # Load images and masks
        img_path = os.path.join(self.root, "img/", self.imgs[idx])
        mask_path = os.path.join(self.root, "labels/", self.masks[idx])
        
        # Convert the image to RGB and resize
        img = Image.open(img_path).convert("RGB")
        
        # Convert to grayscale
        mask = Image.open(mask_path)
        mask = ImageOps.grayscale(mask)
        
        # Convert PIL to np-array
        mask = np.array(mask)
        
        # BLOB Analysis
        label_im = label(mask)
        
        # Instances are different colours
        obj_ids = np.unique(label_im)
        
        # First id is background - remove it
        obj_ids = obj_ids[1:]
        
        # Split into multiple separate mask segments
        masks = (label_im[:, None, None] == obj_ids[:, None, None])
        
        # Convert masks to C, D, H, W
        masks = np.transpose(masks, (2,1,0,3))
        
        # Loop through and get the boxes
        num_objs = len(obj_ids)
        boxes = []
        for i in range(num_objs):
            pos = np.where(label_im == obj_ids[i])
            xmin = np.min(pos[1])
            xmax = np.max(pos[1])
            ymin = np.min(pos[0])
            ymax = np.max(pos[0])
            boxes.append([xmin, ymin, xmax, ymax])

        # Convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        
        # There is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)
        masks = torch.as_tensor(masks, dtype=torch.uint8)

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        
        # Suppose all instances are individual
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target
    
    def __len__(self):
        return len(self.imgs)

In [4]:
# Change to use the gpu for training
def set_device():
    if torch.cuda.is_available():
        dev = "cuda:0"
    else:
        dev = "cpu"
    return torch.device(dev)

In [14]:
def get_model_instance_segmentation(num_classes):
    # Load model on pre-trained COCO
    model = models.detection.maskrcnn_resnet50_fpn(weights='DEFAULT', min_size = 2000, max_size = 2500)
    
    # Get number of inputs for classifier
    in_feat = model.roi_heads.box_predictor.cls_score.in_features
    
    # Replace pre-trained head with new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_feat, num_classes)
    
    # Get input features for the mask classifier
    in_feat_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    
    # Replace predictor
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_feat_mask, hidden_layer, num_classes)
    
    return model

In [6]:
def get_transform(train):
    # Don't need std and mean as this is handled internally by the model
    d_transforms = []
    #d_transforms.append(T.Resize([1125,2000]))
    d_transforms.append(T.PILToTensor())
    d_transforms.append(T.ConvertImageDtype(torch.float))
    if train:
        # during training, randomly flip the training images
        # and ground-truth for data augmentation
        d_transforms.append(T.RandomHorizontalFlip())
    return T.Compose(d_transforms)

In [15]:
# Build model - using pretrained means it already knows the classification of 1000 classes defined
# in the ImageNet database
# The model is here chosen to be resnet 18, but might be changed for resnet 50 in the future for a faster R-CNN network.
# Note that this current model is NOT an R-CNN, which might mean it's not very good currently since we are looking for
# the region(s) of the picture in which the item is located, not a classification of the image as a whole

# Remake the transforms, now with normalization using the acquired mean and std
train_path = "../Data/train/"
test_path = "../Data/test/"

# Make our datasets
train_ds = PVDataSet(train_path, get_transform(train=True))
test_ds = PVDataSet(test_path, get_transform(train=False))

# Put them into loaders
# Collate_fn ensures correct data padding
# Num_workers speeds up the process by allowing paralell processing
train_loader = torch.utils.data.DataLoader(dataset = train_ds, batch_size = 16, 
                                           shuffle = True, collate_fn=utils.collate_fn, num_workers=4)
test_loader = torch.utils.data.DataLoader(dataset = test_ds, batch_size = 16, 
                                          shuffle = False, collate_fn=utils.collate_fn, num_workers=4)

# Define how many new classes we would like to learn - here it is only solar panels for the moment
num_classes = 2 # Solar panels + background

# Prepare matrices for forward propagation using the number of classes and features
model = get_model_instance_segmentation(num_classes)
print(model)

# Set device to GPU if available
device = set_device()
# device = "cpu"
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]

# Define the loss function (between 0.001 - 0.1) & optimizer (Stochastic Gradient Descent)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(params, lr = 0.001, momentum = 0.9, weight_decay = 0.003)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = 3, gamma = 0.1)

MaskRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(2000,), max_size=2500, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(i

In [7]:
# Train the nn
num_epochs = 10

for epoch in range(num_epochs):
    # Train one epoch, print every 10 iterations
    train_one_epoch(model, optimizer, train_loader, device, epoch, print_freq = 3)
    
    # Update learning rate
    lr_scheduler.step()
    
    # Evaluate on test set
    evaluate(model, test_loader, device=device)

# Currently doesn't train correctly, could perhaps have to do with the rescaling function?



Epoch: [0]  [ 0/15]  eta: 0:40:40  lr: 0.000072  loss: 13.4542 (13.4542)  loss_classifier: 0.8433 (0.8433)  loss_box_reg: 0.0137 (0.0137)  loss_mask: 4.9172 (4.9172)  loss_objectness: 6.7789 (6.7789)  loss_rpn_box_reg: 0.9011 (0.9011)  time: 162.6883  data: 14.7286
Epoch: [0]  [ 3/15]  eta: 0:31:56  lr: 0.000286  loss: 10.7365 (10.7424)  loss_classifier: 0.7992 (0.7913)  loss_box_reg: 0.0296 (0.0345)  loss_mask: 3.8813 (3.7565)  loss_objectness: 5.2630 (5.3577)  loss_rpn_box_reg: 0.7334 (0.8023)  time: 159.6875  data: 13.4980
Epoch: [0]  [ 6/15]  eta: 0:23:28  lr: 0.000501  loss: 7.5312 (7.3714)  loss_classifier: 0.6768 (0.6202)  loss_box_reg: 0.0356 (0.0389)  loss_mask: 2.1384 (2.3535)  loss_objectness: 3.9631 (3.6791)  loss_rpn_box_reg: 0.7233 (0.6797)  time: 156.5542  data: 13.0269
Epoch: [0]  [ 9/15]  eta: 0:15:32  lr: 0.000715  loss: 2.3490 (5.7171)  loss_classifier: 0.3724 (0.4752)  loss_box_reg: 0.0299 (0.0349)  loss_mask: 0.3082 (1.6519)  loss_objectness: 1.3712 (2.9352)  loss_

ValueError: ndarray is not Fortran contiguous

In [None]:
# Save the model and make plots
# pick one image from the test set
img, _ = test_ds[0]
# put the model in evaluation mode
model.eval()
with torch.no_grad():
    prediction = model([img.to(device)])

In [None]:
prediction

In [None]:
Image.fromarray(img.mul(255).permute(1, 2, 0).byte().numpy())

In [None]:
Image.fromarray(prediction[0]['masks'][0, 0].mul(255).byte().cpu().numpy())

In [None]:
# Testing
img_path = "../Data/train/img/2021_02_03_10_A_90DJI_0032_height_30m.JPG"
mask_path = "../Data/train/labels/2021_02_03_10_A_90DJI_0032_height_30m.png"

# Convert the image to RGB and resize
img = Image.open(img_path).convert("RGB")
w, h = img.size
img = img.resize((w//2, h//2))

# Convert to grayscale and resize
mask = Image.open(mask_path)
mask = ImageOps.grayscale(mask)
w, h = mask.size
mask = mask.resize((w//2, h//2))

# Convert PIL to np-array
mask = np.array(mask)
label_im = label(mask)
vals = np.unique(label_im)
vals = vals[1:]
masks = (label_im[:, None, None] == vals[:, None, None])
masks = np.transpose(masks, (2,1,0,3))

num_objs = len(vals)
boxes = []
for i in range(num_objs):
    pos = np.where(label_im == vals[i])
    xmin = np.min(pos[1])
    xmax = np.max(pos[1])
    ymin = np.min(pos[0])
    ymax = np.max(pos[0])
    boxes.append([xmin, ymin, xmax, ymax])
#print(boxes)


# convert everything into a torch.Tensor
boxes = torch.as_tensor(boxes, dtype=torch.float32)

# there is only one class
labels = torch.ones((num_objs,), dtype=torch.int64)
masks = torch.as_tensor(masks, dtype=torch.uint8)

image_id = torch.tensor(0)
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
# suppose all instances are individual
iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
print(np.shape(masks))

masks[0,:,:,:].permute(0,2,1)
print(np.shape(masks[0,:,:,:]))