In [11]:
import torch
import torchvision
import os

tile_size = 512

parent_dir = "rois2/"
img_dir = parent_dir + "images/"
tile_dir = parent_dir + "tiles/"
model_dir = parent_dir + "models/"

In [12]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [13]:
def calculate_iou(box1, box2):
    """
    Calculate the Intersection over Union (IoU) of two bounding boxes.
    
    Parameters:
    box1, box2: Arrays or lists in the format [x_min, y_min, x_max, y_max]
    
    Returns:
    float: IoU value
    """
    x1_min, y1_min, x1_max, y1_max = box1
    x2_min, y2_min, x2_max, y2_max = box2
    
    # Calculate the intersection coordinates
    x_min = max(x1_min, x2_min)
    y_min = max(y1_min, y2_min)
    x_max = min(x1_max, x2_max)
    y_max = min(y1_max, y2_max)
    
    # Calculate the intersection area
    intersection_area = max(0, x_max - x_min) * max(0, y_max - y_min)
    
    # Calculate the area of both boxes
    box1_area = (x1_max - x1_min) * (y1_max - y1_min)
    box2_area = (x2_max - x2_min) * (y2_max - y2_min)
    
    # Calculate the union area
    union_area = box1_area + box2_area - intersection_area
    
    # Calculate the IoU
    iou = intersection_area / union_area
    
    return iou

In [14]:
def check_pred_boxes(pred_boxes, true_boxes): # both should be tensors - [N, x1, y1, x2, y2]
    check_list = [0] * true_boxes.shape[0]
    for i in range(pred_boxes.shape[0]):
        for j in range(true_boxes.shape[0]):
            if calculate_iou(pred_boxes[i], true_boxes[j]) > 0.5:
                check_list[j] = 1
    return sum(check_list), len(check_list) # return # of true positives and total # of true boxes
    

In [15]:
import time
class Timer:
    def __init__(self):
        self.start_time = None
        self.end_time = None

    def start(self):
        self.start_time = time.time()

    def stop(self):
        self.end_time = time.time()

    def elapsed_time(self):
        if self.start_time is None:
            raise ValueError("Timer has not been started.")
        if self.end_time is None:
            raise ValueError("Timer has not been stopped.")

        elapsed_seconds = self.end_time - self.start_time
        hours = int(elapsed_seconds // 3600)
        elapsed_seconds %= 3600
        minutes = int(elapsed_seconds // 60)
        seconds = elapsed_seconds % 60

        return str(f"Elapsed time: {hours} hours, {minutes} minutes, {seconds:.2f} seconds")

load dataset from pkl

In [16]:
from torch.utils.data import Dataset, DataLoader
import albumentations as A
import numpy as np
import re
import cv2

with open(tile_dir + 'mean_std.txt') as f:
    lines = f.readlines()
    
mean = []
std = []

for line in lines:
    if line.startswith("Mean: "):
        # Extract the numbers using regex
        numbers_str = re.findall(r'\[([0-9.\s]+)\]', line)[0]
        # Convert the string of numbers to a list of floats
        mean = np.fromstring(numbers_str, sep=' ').tolist()
    if line.startswith("Standard Deviation: "):
        # Extract the numbers using regex
        numbers_str = re.findall(r'\[([0-9.\s]+)\]', line)[0]
        # Convert the string of numbers to a list of floats
        std = np.fromstring(numbers_str, sep=' ').tolist()
print(mean)
print(std)

print(type(mean))

transform = A.Compose([
    # A.augmentations.geometric.rotate.Rotate(limit=15, p=0.5),
    # A.Perspective(scale=[0, 0.1], keep_size=False, fit_output=False, p=1),
    # # A.Resize(224, 224),
    # A.HorizontalFlip(p=0.5),
    # A.GaussNoise(var_limit=(10.0, 50.0), mean=0),
    # # A.RandomToneCurve(scale=0.5, p=1),
    A.Normalize(mean=mean, std=std, max_pixel_value=255.0, p=1.0),
    # ClipCoords(), 
    # ToTensorV2()
], bbox_params=A.BboxParams(format='pascal_voc', min_area=1024, min_visibility=0.1, label_fields=['class_labels']))


def custom_collate_fn(batch):
    return list(batch)

class InputDataset(Dataset):

    def __init__(self, inputs_list):
       
        self.inputs_list = inputs_list

    def __len__(self):
        return len(self.inputs_list)

    def __getitem__(self, idx):
        
        img = cv2.imread(self.inputs_list[idx]['image'])
        # print(self.inputs_list[idx]['image'])
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img_aug = transform(image = img, bboxes = self.inputs_list[idx]['target']['boxes'], class_labels = self.inputs_list[idx]['target']['labels'])['image']
        
        # img = img.permute(1,2,0)
        boxes = self.inputs_list[idx]['target']['boxes']
        labels = self.inputs_list[idx]['target']['labels']
        # clipped_boxes = torch.clamp(boxes, 0, 1)
        # transformed = transform(image = img, bboxes = clipped_boxes, labels = self.inputs_list[idx]['target']['labels'])
        
        # transformed_image = transformed['image']
        # transformed_bboxes = transformed['bboxes' ]
        # transformed_labels = transformed['labels']
        
        targets = {'boxes': (torch.tensor(boxes, dtype=torch.float64)), 'labels': torch.tensor(labels, dtype=torch.int64)}
        
        
        return {'image': img_aug, 'target': targets}

[0.63990641, 0.67864856, 0.74857624]
[0.23133176, 0.20776166, 0.17692773]
<class 'list'>


In [17]:
def trainModel(model, parent_dir, train_dataloader,val_dataloader, epochs_num): 
    timer = Timer()
    timer.start()
    # Defining the optimizer - adamw
    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=0.01)
    
    # learning rate scheduler - CosineAnnealingLR
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=50, eta_min=1e-6)
    
    num_of_epochs = epochs_num
    # Creating a directory for storing models
    model_dir = parent_dir + 'models/'
    if not os.path.exists(model_dir):
        os.mkdir(model_dir)
    
        
    
    for epoch in range(num_of_epochs):
        model.train()
        print('Training started')
        batch_num = 0
        for batch in train_dataloader: # dataloader here, with collate function
            imgs = []
            targets = []
            for item in batch:
                imgs.append(torch.tensor(item['image']).permute(2,0,1).to(device))
                targets.append({'boxes':item['target']['boxes'].to(device), 'labels':item['target']['labels'].to(device)})
            
            # Sets the gradients of all optimized tensors to zero
            optimizer.zero_grad()
            # print('img len:', len(imgs), imgs[0].shape)
            # print('target len:', len(targets))
            result = model(imgs, targets)
   
            batch_num += 1
            # print('result updated')
            # pred - Dict[Tensor], containing classificaiton and regression losses
            # print(batch, '\n')
            # print(result)
            
            bbox_loss = result['bbox_regression']
            # class_loss  = (result['classification'], targets[0]['labels'])
            
            loss = torch.sum(bbox_loss)
            loss.backward()
            optimizer.step()
            scheduler.step()
            
            
            timer.stop()
            
            if batch_num % 50 == 0:
                print("     Epoch: ", epoch+1
                      , " train batch:", batch_num, " time: ", (timer.elapsed_time()), " bbox loss: ", bbox_loss)
            # print("     Epoch: ", epoch+1, " train batch:", batch_num, " time: ", (total_time)/1000, " bbox loss: ", bbox_loss)
                
        # save model for each epoch
        model_path = model_dir + 'model_epoch' + str(epoch + 1) + '.pth'
        torch.save(model.state_dict(), model_path)
        
        # evaluate the model
        iou_thr = 0.5
        score_thr = 0
        
        model.eval()
        print('Validation started')
        true_positive_cnt = 0
        total_cnt = 0
        with torch.no_grad():
            batch_num = 0
            for batch in val_dataloader: # dataloader here, with collate function
                imgs = []
                targets = []
                for item in batch:
                    imgs.append(torch.tensor(item['image']).permute(2,0,1).to(device))
                    targets.append({'boxes':item['target']['boxes'].to(device), 'labels':item['target']['labels'].to(device)})
                        
                preds = model(imgs)      
                batch_num += 1
                for i, img_pred in enumerate(preds):
                    filtered_idx = torchvision.ops.nms(img_pred['boxes'], img_pred['scores'], iou_thr)
                    target = targets[i]
                    tp, tot = check_pred_boxes(img_pred['boxes'][filtered_idx], target['boxes'])
                    true_positive_cnt += tp
                    total_cnt += tot
                recall = true_positive_cnt / total_cnt
                timer.stop()
                if batch_num % 50 == 0:
                    print("     Epoch: ", epoch+1, " validation batch:", batch_num, " time: ", (timer.elapsed_time()), " current recall: ", recall)
        # print("     Epoch: ", epoch+1, " recall: ", recall)
 
        torch.cuda.empty_cache()

In [18]:
import pickle
train_dataloader = pickle.load(open(model_dir + 'train_dataloader.pkl', 'rb'))
val_dataloader = pickle.load(open(model_dir + 'val_dataloader.pkl', 'rb'))

In [19]:
model = torchvision.models.detection.ssdlite320_mobilenet_v3_large(num_classes=2)
model = model.to('cuda')

trainModel(model, model_dir, train_dataloader, val_dataloader, 10)

Training started


  targets = {'boxes': (torch.tensor(boxes, dtype=torch.float64)), 'labels': torch.tensor(labels, dtype=torch.int64)}


     Epoch:  1  train batch: 50  time:  Elapsed time: 0 hours, 0 minutes, 13.68 seconds  bbox loss:  tensor(1.5968, device='cuda:0', dtype=torch.float64, grad_fn=<DivBackward0>)
     Epoch:  1  train batch: 100  time:  Elapsed time: 0 hours, 0 minutes, 27.36 seconds  bbox loss:  tensor(1.0294, device='cuda:0', dtype=torch.float64, grad_fn=<DivBackward0>)
     Epoch:  1  train batch: 150  time:  Elapsed time: 0 hours, 0 minutes, 41.67 seconds  bbox loss:  tensor(0.8714, device='cuda:0', dtype=torch.float64, grad_fn=<DivBackward0>)
     Epoch:  1  train batch: 200  time:  Elapsed time: 0 hours, 0 minutes, 55.93 seconds  bbox loss:  tensor(1.2293, device='cuda:0', dtype=torch.float64, grad_fn=<DivBackward0>)
     Epoch:  1  train batch: 250  time:  Elapsed time: 0 hours, 1 minutes, 10.28 seconds  bbox loss:  tensor(0.6762, device='cuda:0', dtype=torch.float64, grad_fn=<DivBackward0>)
     Epoch:  1  train batch: 300  time:  Elapsed time: 0 hours, 1 minutes, 24.40 seconds  bbox loss:  tens