## Model Training
This notebook serves as the basis for training the model to recognise and classify solar panels. 

## Questions
- How will the drone fly? Can I expect it to have a top-down view from the camera?

In [2]:
import transforms as T
import torch
import torchvision
import os
import utils
import torchvision.transforms as trans
import torchvision.models as models
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
import shutil
import cv2
import albumentations as A
from PIL import Image
from skimage import io, data
from skimage.filters import threshold_otsu
from skimage.segmentation import clear_border
from skimage.measure import label, regionprops
from skimage.morphology import closing, square
from skimage.color import label2rgb, rgb2gray, gray2rgb
from engine import train_one_epoch, evaluate
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

In [None]:
# Start by reading the dataset
# One thing worth noting is that this dataset is entirely comprised of top-down pictures, so could perform
# more poorly from other angles
train_path = "../Data/train/"
img = "../Data/train/img/2021_02_03_10_A_90DJI_0032_height_30m.JPG"

# Get image size
img_org = io.imread(img)
print(f"Image size {img_org.shape}")

# Ensure all images are of the same size and change type to tensors (which is the torch equivalent of np-arrays)
# It might be worth resizing them smaller, but there is a tradeoff between computational speed and accuracy
# In the end, perhaps the best solution is to use the size which the images comes from the camera
train_transforms = T.Compose([T.Resize((2250, 4000)), T.ToTensor()])

In [5]:
# Reorganise data set into labels and images - you only need to run this once after downloading data set!

# Define paths
train_path = "../Data/train_val_set/"
new_train_img = "../Data/train/img/"
test_path = "../Data/test_set/"
new_test_img = "../Data/test/img/"

# Get images
img_train = [f for f in os.listdir(train_path) if '.jpg' in f.lower()]
img_test = [f for f in os.listdir(test_path) if '.jpg' in f.lower()]

# Change folders
for img in img_train:
    old_path = train_path + img
    img_path = new_train_img + img
    shutil.move(old_path, img_path)

for img in img_test:
    old_path = test_path + img
    img_path = new_test_img + img
    shutil.move(old_path, img_path)

In [2]:
# Make our own dataset
class PVDataSet(torch.utils.data.Dataset):
    def __init__(self, root, d_transforms = None):
        self.root = root
        self.transforms = d_transforms
        
        # Load all image files
        self.imgs = list(os.listdir(os.path.join(root, "img/")))
        self.masks = list(os.listdir(os.path.join(root, "labels/")))
        
    def __getitem__(self, idx):
        # Load images and masks
        img_path = os.path.join(self.root, "img/", self.imgs[idx])
        mask_path = os.path.join(self.root, "labels/", self.masks[idx])
        
        # Convert the image to RGB
        #img = Image.open(img_path).convert("RGB")
        img = Image.open(img_path).convert("RGB")
        mask = Image.open(mask_path)
        
        # Convert PIL to np-array
        mask = np.array(mask)
        
        # BLOB Analysis
        label_im = label(mask)
        
        # Instances are different colours
        obj_ids = np.unique(label_im)
        
        # First id is background - remove it
        obj_ids = obj_ids[1:]
        
        # Split into multiple separate mask segments
        masks = (label_im[:, None, None] == obj_ids[:, None, None])
        
        # Loop through and get the boxes
        num_objs = len(obj_ids)
        boxes = []
        for i in range(num_objs):
            pos = np.where(label_im == obj_ids[i])
            xmin = np.min(pos[1])
            xmax = np.max(pos[1])
            ymin = np.min(pos[0])
            ymax = np.max(pos[0])
            boxes.append([xmin, ymin, xmax, ymax])

        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        
        # there is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)
        masks = torch.as_tensor(masks, dtype=torch.uint8)

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are individual
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target
    
    def __len__(self):
        return len(self.imgs)


In [None]:
# Extract datasets - Note that ImageFolder requires each class of interest to be its own subfolder
#train_ds = torchvision.datasets.ImageFolder(root = train_path, transform = train_transforms)
train_ds = PVDataSet("../Data/train/", train_transforms)

# Load set, make sure not to shuffle since the labelled images are listed 1 entry after the original
train_loader = torch.utils.data.DataLoader(dataset = train_ds, batch_size = 32, shuffle = False)

In [None]:
# Extract mean and standard deviation (might be useful later)
def get_mean_and_std(loader):
    mean = 0
    std = 0
    total_img = 0
    
    # Iterate through batches, we don't need their names, hence _
    for batch, _ in loader:
        # How many images are there (could be less than 32)
        img_count = batch.size(0)
        
        # Get the actual images out of the batch, reshape them
        images = batch.view(img_count, batch.size(1), -1)
        
        # Add mean/std from batch to total
        mean += images.mean(2).sum(0)
        std += images.std(2).sum(0)
        total_img += img_count
    
    # Get mean and std
    mean /= total_img
    std /= total_img
    
    return mean, std

In [None]:
# Get mean and std
mean, std = get_mean_and_std(train_loader)

In [None]:
print(f"Mean: {mean}, Standard Deviation: {std}")

In [3]:
# Change to use the gpu for training
def set_device():
    if torch.cuda.is_available():
        dev = "cuda:0"
    else:
        dev = "cpu"
    return torch.device(dev)

In [4]:
def train_nn(model, train_loader, test_loader, criterion, optimizer, epochs):
    device = set_device()
    
    # Train through all the epochs
    for epoch in range(epochs):
        print("Epoch number %d" % (epoch + 1))
        model.train()
        running_loss = 0.0
        running_correct = 0.0
        total = 0
        
        # Run optimizer on all batches (this might be changed if
        # there is a better algorithm than SGD)
        for data in train_loader.dataset:
            # Send data to gpu
            images, labels, _, _ = data
            images = images.to(device)
            labels = images.to(device)
            total += labels.size(0)
            
            # Set gradient to 0 so parameters update correctly
            optimizer.zero_grad()
            
            output = model(images)
            
            # Get the prediction for output
            _, predictions = torch.max(outputs.data, 1)
            
            # Check how many items were identified incorrectly
            loss = criterion(outputs, labels)
            
            # Back-propagate
            loss.backwards()
            
            # Update weights
            optimizer.step()
            
            # Update loss & correct
            running_loss += loss.item()
            running_correct += (labels==predicted).sum().item()
            
        # Print the result of the epoch
        epoch_loss = running_loss/len(train_loader)
        epoch_acc = 100.00 * running_correct / total
        
        print("   - Training dataset got %d out of %d images correctly (%.3f##). Epoch loss: %.3f"
              % (running_correct, total, epoch_acc, epoch))
        
        # Test model
        evaluate_model(model, test_loader)
        
    print("Finished")
    return model

In [5]:
def evaluate_model(model, test_loader):
    # Notify all layers that you are evaluating - such that dropoff applies
    model.eval()
    pred_correct = 0
    total = 0
    device = set_device()
    
    # Activate no_grad, such that we don't backpropagate during evaluation
    with torch.no_grad():
        
        # Same as training, except without updating weights
        for data in test_loader.dataset:
            images, labels, _, _ = data
            images = images.to(device)
            labels = images.to(device)
            total += labels.size(0)

            # Get outputs and find predictions
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            
            # Update counts
            pred_correct += (predicted == labels).sum().item() 
    
    # Update and print epoch accuracy
    epoch_acc = 100.00 * running_correct / total
    print("   - Training dataset got %d out of %d images correctly (%.3f##). Epoch loss: %.3f"
          % (running_correct, total, epoch_acc, epoch))

In [6]:
def get_model_instance_segmentation(num_classes):
    # Load model on pre-trained COCO
    model = models.detection.maskrcnn_resnet50_fpn(weights='DEFAULT')
    
    # Get number of inputs for classifier
    in_feat = model.roi_heads.box_predictor.cls_score.in_features
    
    # Replace pre-trained head with new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_feat, num_classes)
    
    # Get input features for the mask classifier
    in_feat_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    
    # Replace predictor
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_feat_mask, hidden_layer, num_classes)
    
    return model

In [7]:
def get_transform():
    mean = [0.3014, 0.2905, 0.2335]
    std = [0.0938, 0.0904, 0.0874]

    # This train transforms could also have elements randomly flipped, but the problem is that the 
    # labelled image would then be wrong - thus introducing errors into our model
    d_transforms = []
    #d_transforms.append(trans.Resize(2250, 4000))
    d_transforms.append(T.PILToTensor())
    #d_transforms.append(trans.Normalize(torch.Tensor(mean), torch.Tensor(std)))
    d_transforms.append(T.ConvertImageDtype(torch.float))
    return T.Compose(d_transforms)

In [16]:
# Build model - using pretrained means it already knows the classification of 1000 classes defined
# in the ImageNet database
# The model is here chosen to be resnet 18, but might be changed for resnet 50 in the future for a faster R-CNN network.
# Note that this current model is NOT an R-CNN, which might mean it's not very good currently since we are looking for
# the region(s) of the picture in which the item is located, not a classification of the image as a whole

# Remake the transforms, now with normalization using the acquired mean and std
train_path = "../Data/train/"
test_path = "../Data/test/"

# Mean and std for static segmentation

# Mean and std for moving_labeled
#mean = [0.2723, 0.2529, 0.2091]
#std = [0.1044, 0.0988, 0.0860]

# Make our datasets
train_ds = PVDataSet(train_path, get_transform())
test_ds = PVDataSet(test_path, get_transform())

# Put them into loaders
# Collate_fn ensures correct data padding
# Num_workers speeds up the process by allowing paralell processing
train_loader = torch.utils.data.DataLoader(dataset = train_ds, batch_size = 32, 
                                           shuffle = False, collate_fn=utils.collate_fn) #num_workers=2)
test_loader = torch.utils.data.DataLoader(dataset = test_ds, batch_size = 32, 
                                          shuffle = False, collate_fn=utils.collate_fn) #num_workers=2)

# Define how many new classes we would like to learn - here it is only solar panels for the moment
num_classes = 2 # Solar panels + background

# Prepare matrices for forward propagation using the number of classes and features
#resnet18_model.fc = nn.Linear(num_features, num_classes)
model = get_model_instance_segmentation(num_classes)

# Set device to GPU if available
device = set_device()
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]

# Define the loss function (between 0.001 - 0.1) & optimizer (Stochastic Gradient Descent)
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(params, lr = 0.01, momentum = 0.9, weight_decay = 0.003)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = 3, gamma = 0.1)

In [17]:
# Train the nn
#resnet18_model = train_nn(resnet18_model, train_loader, test_loader, loss_fn, optimizer, 10)
num_epochs = 10

for epoch in range(num_epochs):
    # Train one epoch, print every 10 iterations
    train_one_epoch(model, optimizer, train_loader, device, epoch, print_freq = 10)
    
    # Update learning rate
    lr_scheduler.step()
    
    # Evaluate on test set
    evaluate(model, test_loader, device=device)

NotImplementedError: Input Error: Only 3D, 4D and 5D input Tensors supported (got 6D) for the modes: nearest | linear | bilinear | bicubic | trilinear | area | nearest-exact (got nearest)

In [None]:
img_path = "../Data/train/img/2021_02_03_10_A_90DJI_0032_height_30m.JPG"
mask_path = "../Data/train/labels/2021_02_03_10_A_90DJI_0032_height_30m.png"

# Attempt with cv2
# im = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

# params = cv2.SimpleBlobDetector_Params()
# Disable unwanted filter criteria params
# params.filterByInertia = False
# params.filterByConvexity = False
# params.filterByCircularity = False

# blob_detector = cv2.SimpleBlobDetector(params)
# points = blob_detector.detect(im)

# imshow("Blobs", points)

# Attempt with skimage
# im = io.imread(mask_path)

mask = cv2.open(mask_path)

# Convert PIL to np-array
mask = np.array(mask)
label_im = label(mask)
vals = np.unique(label_im)
vals = vals[1:]
#print(vals[:, None, None])
masks = (label_im[:, None, None] == vals[:, None, None])
#print(masks)
num_objs = len(vals)
boxes = []
for i in range(len(vals)):
    pos = np.where(label_im == vals[i])
    xmin = np.min(pos[1])
    xmax = np.max(pos[1])
    ymin = np.min(pos[0])
    ymax = np.max(pos[0])
    boxes.append([xmin, ymin, xmax, ymax])
print(boxes)

# convert everything into a torch.Tensor
boxes = torch.as_tensor(boxes, dtype=torch.float32)

# there is only one class
labels = torch.ones((num_objs,), dtype=torch.int64)
masks = torch.as_tensor(masks, dtype=torch.uint8)

image_id = torch.tensor(0)
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
# suppose all instances are individual
iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

target = {}
target["boxes"] = boxes
target["labels"] = labels
target["masks"] = masks
target["image_id"] = image_id
target["area"] = area
target["iscrowd"] = iscrowd
print(target)

In [None]:
Image.open("../Data/train/labels/2021_02_03_10_A_90DJI_0032_height_30m.png")

In [None]:
test_loader.dataset.root