In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
%cd /content/gdrive/MyDrive/ML/UAV/model

/content/gdrive/MyDrive/ML/UAV/model


In [None]:
train_data_path = '/content/gdrive/MyDrive/ML/UAV/dataset/train_images/'
valid_data_path = '/content/gdrive/MyDrive/ML/UAV/dataset/valid_images/'
test_data_path = '/content/gdrive/MyDrive/ML/UAV/dataset/test_images/'

train_annots_path = '/content/gdrive/MyDrive/ML/UAV/dataset/train_annots/'
valid_annots_path = '/content/gdrive/MyDrive/ML/UAV/dataset/valid_annots/'
test_annots_path = '/content/gdrive/MyDrive/ML/UAV/dataset/test_annots/'

In [None]:
from xml.etree import ElementTree as et
import cv2
import numpy as np
import os
import pandas as pd

In [None]:
CLASSES = {
    '__background__':0,
    'building':1,
    'ship':2,
    'vehicle':3,
    'prefabricated-house':4,
    'well':5,
    'cable-tower':6,
    'pool':7,
    'landslide':8,
    'cultivation-mesh-cage':9,
    'quarry':10

}

In [None]:
def extract_from_xml(annot_path):
  annotations = sorted([file for file in os.listdir(annot_path) if file.endswith('.xml')])
  data = []
  for annotation in annotations:
    annot_file_path = os.path.join(annot_path, annotation)
    tree = et.parse(annot_file_path)
    root = tree.getroot()

    image_id = annotation.replace('.xml', '')
    width  = int(root.find("size").find("width").text)
    height = int(root.find("size").find("height").text)

    for member in root.findall('object'):
        class_id = CLASSES[member.find('name').text]
        xmin = float(member.find('bndbox').find('xmin').text)
        xmax = float(member.find('bndbox').find('xmax').text)
        ymin = float(member.find('bndbox').find('ymin').text)
        ymax = float(member.find('bndbox').find('ymax').text)

        if xmax == xmin:
            xmin -= 1
        if ymax == ymin:
            ymin -= 1

        data.append({
            'image_id': image_id,
            'class_id': class_id,
            'xmin': xmin,
            'ymin': ymin,
            'xmax': xmax,
            'ymax': ymax
        })

  df = pd.DataFrame(data)

  return df

In [None]:
train_df = extract_from_xml(train_annots_path)
valid_df = extract_from_xml(valid_annots_path)
test_df = extract_from_xml(test_annots_path)


In [None]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms as T
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import albumentations as A
from PIL import Image

In [None]:
class UAVDataset(Dataset):
    def __init__(self, df, file_path, transform=None):
        self.df = df
        self.file_path = file_path
        self.unique_imgs = self.df.image_id.unique()
        self.transform = transform

    def __len__(self):
        return len(self.unique_imgs)

    def __getitem__(self, idx):
        image_name = self.unique_imgs[idx]
        boxes = self.df[self.df.image_id == image_name].values[:, 2:].astype("float")
        labels = self.df[self.df.image_id == image_name].values[:, 1].astype("int")
        img = Image.open(self.file_path + image_name + ".jpg").convert('RGB')
        img = np.array(img)

        if self.transform is not None:
            transformed = self.transform(image=img, bboxes=boxes, category_ids=labels)
            img = transformed['image']
            boxes = transformed['bboxes']
            labels = transformed['category_ids']

        target = {}
        target["boxes"] = torch.tensor(boxes)
        target["labels"] = torch.tensor(labels)
        return T.ToTensor()(img), target



In [None]:
def custom_collate(data):
  return data

In [None]:
transform = A.Compose(
    [A.Resize(1000, 1000, p=1.0),
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=30, p=0.3)],
    bbox_params={
        'format': 'pascal_voc',
        'label_fields': ['category_ids']
    })

In [None]:
train_data = DataLoader(UAVDataset(train_df, train_data_path, transform=transform),
                        batch_size = 4,
                        shuffle=True,
                        collate_fn= custom_collate)

valid_data = DataLoader(UAVDataset(valid_df, valid_data_path),
                        batch_size = 4,
                        shuffle=True,
                        collate_fn= custom_collate)

test_data = DataLoader(UAVDataset(test_df, test_data_path),
                        batch_size = 4,
                        shuffle=True,
                        collate_fn= custom_collate)

In [None]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn_v2(pretrained = True)
num_classes = len(CLASSES)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_v2_coco-dd69338a.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_v2_coco-dd69338a.pth
100%|██████████| 167M/167M [00:00<00:00, 288MB/s]


In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr = 0.001, momentum=0.9, weight_decay= 0.0005)
num_epochs = 5

In [None]:

from torch.cuda.amp import autocast, GradScaler


model.to(device)
scaler = GradScaler()

for epoch in range(num_epochs):
    epoch_loss = 0

    for batch_idx, data in enumerate(train_data):
        imgs = [d[0].to(device) for d in data]
        targets = [{"boxes": d[1]["boxes"].to(device), "labels": d[1]["labels"].to(device)} for d in data]

        optimizer.zero_grad()

        with autocast():
            loss_dict = model(imgs, targets)
            loss = sum(loss for loss in loss_dict.values())

        scaler.scale(loss).backward()

        if (batch_idx + 1) % 32 == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()

        epoch_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}] Loss: {epoch_loss:.4f}")


In [None]:
import pickle
pickle.dump(model, open('model.pkl', 'wb'))

In [None]:
from torchmetrics.detection.mean_ap import MeanAveragePrecision
metric = MeanAveragePrecision()

In [None]:
def evaluate_model(model, test_data):
  model.eval()
  target = []
  preds = []

  for batch_idx, data in enumerate(train_data):
        imgs = [d[0].to(device) for d in data]
        targets = [{"boxes": d[1]["boxes"].to(device), "labels": d[1]["labels"].to(device)} for d in data]

  with torch.no_grad():
            outputs = model(imgs, targets)

  for i in range(len(imgs)):
            true_dict = dict()
            preds_dict = dict()
            true_dict['boxes'] = targets[i]['boxes'].detach()
            true_dict['labels'] = targets[i]['labels'].detach()
            preds_dict['boxes'] = outputs[i]['boxes'].detach()
            preds_dict['scores'] = outputs[i]['scores'].detach()
            preds_dict['labels'] = outputs[i]['labels'].detach()
            preds.append(preds_dict)
            target.append(true_dict)

  metric.reset()
  metric.update(preds, target)
  metric_summary = metric.compute()
  return metric_summary