In [26]:
import os
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup
import numpy as np

In [27]:
import cv2
from PIL import Image

def get_bbox(obj):
    xmin = int(obj.find('xmin').text)
    ymin = int(obj.find('ymin').text)
    xmax = int(obj.find('xmax').text)
    ymax = int(obj.find('ymax').text)
    return [xmin, ymin, xmax, ymax]

def get_label(obj):
    if obj.find('name').text == "with_mask":
        return 2
    elif obj.find('name').text == "mask_weared_incorrect":
        return 3
    return 1

def get_sample(path, image_id): 
    with open(f'{path}/annotations/maksssksksss{image_id}.xml') as f:
        data = f.read()
        soup = BeautifulSoup(data, 'xml')
        objects = soup.find_all('object')

        num_objs = len(objects)
        boxes = []
        labels = []
        for i in objects:
            boxes.append(get_bbox(i))
            labels.append(get_label(i))

        # Annotation is in dictionary format
        sample = {}
        sample["boxes"] = boxes
        sample["labels"] = labels
        sample['image'] = np.array(Image.open(f'images/maksssksksss{image_id}.png').convert('RGB')) 
        sample['image_id'] = image_id
        
        return sample

In [28]:
def draw_rects(sample, save_path = None):
    result = sample['image'].copy()
    for box, label in zip(sample['boxes'], sample['labels']):
        color = (0, 0, 255)
        if label == 2:
            color = (0, 255, 0)
        elif label == 3:
            color = (255, 0, 0)
        cv2.rectangle(result, (box[0],box[1]), (box[2], box[3]), color, 2)
    
    if save_path:
        im = Image.fromarray(result)
        im.save(save_path)

    plt.imshow(result)

In [29]:
def normalize_dataset(dataset):
    for sample in dataset:
        sample['image'] = sample['image'] / 255.

def standarize_dataset(dataset):
    mean = np.mean([sample['image'].mean() for sample in dataset])
    std_dev = np.std([sample['image'].std() for sample in dataset])
    for sample in dataset:
        sample['image'] = (sample['image'] - mean) / std_dev

In [30]:
import imgaug as ia
import imgaug.augmenters as iaa
from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage
import random
import copy

In [31]:
def random_augumentation(sample):
    augumented = copy.deepcopy(sample)
    type = random.randint(0,31)
    
    bbs = []
    for bb in augumented['boxes']:
        bbs.append(BoundingBox(x1=bb[0], x2=bb[2], y1=bb[1], y2=bb[3]))
    bbs = BoundingBoxesOnImage(bbs, shape=augumented['image'].shape)
    
    params = {'fit_output': True}
    if type & 0x1:
        params['rotate'] = (-random.randint(1,15), random.randint(1,15))
    if type & 0x2:
        params['translate_percent'] ={"x": random.uniform(0,0.2), "y": random.uniform(0,0.2)}
    if type & 0x4:
        params['shear'] =(0,random.randint(1,10))
    if type & 0x8:
        flip_hr=iaa.Fliplr(p=1.0)
        augumented['image'], bbs = flip_hr(image = augumented['image'], bounding_boxes=bbs)
    if type & 0x10:
        params['scale'] = random.uniform(1.1,1.3)
    
    aug = iaa.Affine(**params) 
    augumented['image'], bbs = aug(image = augumented['image'], bounding_boxes=bbs)
    for i, bb in enumerate(bbs):
        augumented['boxes'][i] = [bb.x1, bb.y1, bb.x2, bb.y2]
    
    #remove bbs outside the image
    to_delete = list()
    for i, bb in enumerate(augumented['boxes']):
        if (not(0.< bb[0] <= augumented['image'].shape[1])) or \
        (not(0.< bb[2] <= augumented['image'].shape[1])) or \
        (not(0.< bb[1] <= augumented['image'].shape[0])) or \
        (not(0.< bb[3] <= augumented['image'].shape[0])):
            to_delete.append(i)
        elif bb[0] >= bb[2] or bb[1] >= bb[3]:
            to_delete.append(i)
            
    for index in sorted(to_delete, reverse=True):
        del augumented['boxes'][index]
        del augumented['labels'][index]
        
    return augumented

In [32]:
from sklearn.model_selection import train_test_split

In [33]:
def load_dataset_by_id(ids, path = '.'):
    n = len(os.listdir(f'{path}/annotations/'))
    dataset = []
    for i in ids:
        dataset.append(get_sample(path, i))
    return dataset

In [34]:
def get_split_1(train_ids, test_ids, val_ids):
    return load_dataset_by_id(train_ids), load_dataset_by_id(test_ids), load_dataset_by_id(val_ids)

In [35]:
# SPLIT2
def get_agumenteted_data(samples):
    new_samples = []
    for sample in samples:
        new_samples.append(sample)
        if (3 in sample["labels"] or 1 in sample["labels"]) and 2 in sample["labels"]:
            for _ in range(1):
                new_samples.append(random_augumentation(sample)) 
        elif 3 in sample["labels"]:
            for _ in range(12):
                new_samples.append(random_augumentation(sample)) 
        elif 1 in sample["labels"]:
            for _ in range(4):
                new_samples.append(random_augumentation(sample)) 
    return new_samples
    
def get_split_2(train_ids, test_ids, val_ids):
    train_samples = get_agumenteted_data(load_dataset_by_id(train_ids))
    test_samples = get_agumenteted_data(load_dataset_by_id(test_ids))
    val_samples = get_agumenteted_data(load_dataset_by_id(val_ids))

    normalize_dataset(train_samples)
    standarize_dataset(train_samples)
    
    normalize_dataset(test_samples)
    standarize_dataset(test_samples)
    
    normalize_dataset(val_samples)
    standarize_dataset(val_samples)
    
    return train_samples, test_samples, val_samples
    

In [36]:
#SPLIT3
def get_split_3(train_samples, test_samples, val_samples):
    return train_samples + val_samples, test_samples, val_samples

In [37]:
from sklearn.model_selection import train_test_split
annotations_id_train, annotations_id_test = train_test_split(range(853), test_size=0.2)
annotations_id_train, annotations_id_val = train_test_split(annotations_id_train, test_size=0.25)

In [38]:
import numpy as np 
import pandas as pd
import torchvision
from torchvision import transforms, datasets, models
import torch
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import matplotlib.pyplot as plt
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
import matplotlib.patches as patches


print(torch.cuda.is_available())
print(torch.__version__)
import os


True
1.7.1+cu101


In [14]:
def get_model_instance_segmentation(num_classes):
    # load an instance segmentation model pre-trained pre-trained on COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes+1)

    return model

In [15]:
def grouped(iterable, n):
    "s -> (s0,s1,s2,...sn-1), (sn,sn+1,sn+2,...s2n-1), (s2n,s2n+1,s2n+2,...s3n-1), ..."
    return zip(*[iter(iterable)]*n)

# Training


In [16]:
def image_to_tensor(img):
    return torch.tensor(img.transpose((2, 0, 1)), dtype=torch.float32)

In [17]:
from datetime import datetime
def train_epoch(model, data, optimizer, logger, accumulation_steps, only_class = False):
    model.train()
    optimizer.zero_grad()
    i = 0    
    total_loss = 0
    accu_step_loss = 0.
    class_step_loss = 0.
    for batch in grouped(data, BATCH_SIZE):
        i += 1
        imgs = list(image_to_tensor(s['image']).to(device) for s in batch)
        annotations = [{k: torch.tensor(s[k]).to(device) for k in s.keys() - {'image'}} for s in batch]
        
        for a in annotations:
            if a['boxes'].shape[-1] != 4:
                 a['boxes'].resize_((1,4))
                 a['labels'].type(dtype=torch.int64)
        
        loss_dict = model(imgs, annotations)
        class_loss = loss_dict['loss_classifier']
        losses = sum(loss for loss in loss_dict.values())
        
        if only_class:
            class_loss.backward()
        else:
            losses.backward()
        
        
        total_loss += losses.item()
        accu_step_loss += losses.item()
        class_step_loss += class_loss.item()
        
        if (i + 1) % accumulation_steps == 0:
            optimizer.step()
            optimizer.zero_grad()
            training_stats = accu_step_loss / accumulation_steps       
            class_stats = class_step_loss / accumulation_steps
            
            accu_step_loss = 0.
            class_step_loss = 0.
            
            if only_class:
                class_lr = optimizer.param_groups[0]['lr']
                lr = 0.0
            else:
                lr = optimizer.param_groups[0]['lr']
                class_lr = 0.
            logger.log_metrics({"accu_step_loss": training_stats,
                                "accu_class_loss": class_stats,
                                "class_lr": class_lr,
                                "lr": lr})
        
    optimizer.step()
    optimizer.zero_grad()
    avg_loss = total_loss / (len(train)//BATCH_SIZE)
    return avg_loss


def evaluate(model, data):
    #model.eval()  # Turn   on the evaluation mode
    model.train() # loss calculation is a bit complicated, 
                  # for validation purposes loss can be obtained from training mode.
                  # https://stackoverflow.com/questions/60339336/validation-loss-for-pytorch-faster-rcnn/65347721#65347721
    total_loss = 0.

    with torch.no_grad():
        for sample in data:
            img = image_to_tensor(sample['image']).to(device)
            annotation = {k: torch.tensor(sample[k]).to(device) for k in sample.keys() - {'image'}}
            loss_dict = model([img], [annotation])
            total_loss += sum(loss for loss in loss_dict.values()).item()
    
    return total_loss / len(data)


def save_model(model, name = 'model-'):
    path = "./models/"
    dateTimeObj = datetime.now()
    timestamp = dateTimeObj.strftime("%d-%b-%Y_%H:%M")
    filename = path + name + timestamp + '.pt'
    torch.save(model.state_dict(), filename)

# SPLIT2

# Loading model example

In [63]:
train, test, val = get_split_2(annotations_id_train, annotations_id_test, annotations_id_val)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

model = get_model_instance_segmentation(3).to(device)
model.load_state_dict(torch.load("models/MODEL2.pt"))
model.eval()


FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256)
          (relu): ReLU(inplace=True)
          (downsample)

In [None]:
class TorchDataset(object):
    def __init__(self, root):
        self.data = root
        
    def __getitem__(self, idx):
        img = image_to_tensor(self.data[idx]['image'])
        image_id = torch.tensor([idx])
        boxes = self.data[idx]['boxes']
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        num_objs = len(self.data[idx]['boxes'])
        
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = torch.as_tensor(self.data[idx]['labels'], dtype=torch.int64)
        target["image_id"] = torch.tensor([self.data[idx]['image_id']])
        target["area"] = area
        target["iscrowd"] = iscrowd

        return img, target

    def __len__(self):
        return len(self.data)

In [None]:
test_dataset = TorchDataset(test)
train_dataset = TorchDataset(train)
val_dataset = TorchDataset(val)

In [None]:
data_loader_test = torch.utils.data.DataLoader(
test_dataset, batch_size=1, shuffle=False, num_workers=1)
data_loader_train = torch.utils.data.DataLoader(
train_dataset, batch_size=1, shuffle=False, num_workers=1)
data_loader_val = torch.utils.data.DataLoader(
val_dataset, batch_size=1, shuffle=False, num_workers=1)

In [40]:
import math
import sys
import time
from coco_utils import get_coco_api_from_dataset
from coco_eval import CocoEvaluator
import utils

In [68]:
@torch.no_grad()
def evaluate_metrics(model, data_loader, device):
    n_threads = torch.get_num_threads()
    # FIXME remove this and make paste_masks_in_image run on the GPU
    torch.set_num_threads(1)
    cpu_device = torch.device("cpu")
    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = 'Test:'

    coco = get_coco_api_from_dataset(data_loader.dataset)
    iou_types = ["bbox"]
    coco_evaluator = CocoEvaluator(coco, iou_types)

    for images, targets in metric_logger.log_every(data_loader, 100, header):
        images = list(img.to(device) for img in images)

        if torch.cuda.is_available():
            torch.cuda.synchronize()
        model_time = time.time()
        outputs = model(images)

        outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
        model_time = time.time() - model_time
        
        res = {target["image_id"].item(): output for output, target in zip(outputs, [targets])}
        evaluator_time = time.time()
        coco_evaluator.update(res)
        evaluator_time = time.time() - evaluator_time
        metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)

    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)
    coco_evaluator.synchronize_between_processes()

    # accumulate predictions from all images
    coco_evaluator.accumulate()
    coco_evaluator.summarize()
    torch.set_num_threads(n_threads)
    return coco_evaluator

In [69]:
test_eval = evaluate_metrics(model, data_loader_test, device)


creating index...
index created!
Test:  [  0/326]  eta: 0:01:30  model_time: 0.1763 (0.1763)  evaluator_time: 0.0013 (0.0013)  time: 0.2785  data: 0.0892  max mem: 2507
Test:  [100/326]  eta: 0:00:34  model_time: 0.1532 (0.1467)  evaluator_time: 0.0006 (0.0022)  time: 0.1594  data: 0.0017  max mem: 2507
Test:  [200/326]  eta: 0:00:19  model_time: 0.1530 (0.1477)  evaluator_time: 0.0009 (0.0016)  time: 0.1513  data: 0.0016  max mem: 2507
Test:  [300/326]  eta: 0:00:03  model_time: 0.1414 (0.1473)  evaluator_time: 0.0009 (0.0015)  time: 0.1457  data: 0.0017  max mem: 2507
Test:  [325/326]  eta: 0:00:00  model_time: 0.1486 (0.1473)  evaluator_time: 0.0009 (0.0015)  time: 0.1498  data: 0.0016  max mem: 2507
Test: Total time: 0:00:49 (0.1517 s / it)
Averaged stats: model_time: 0.1486 (0.1473)  evaluator_time: 0.0009 (0.0015)
Accumulating evaluation results...
DONE (t=0.03s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.189
 Average Precision  

In [None]:
evaluate_metrics(model, data_loader_train, device)
evaluate_metrics(model, data_loader_val, device)
evaluate_mectrics(model, data_loader_test, device)

In [43]:
from src.neptune_logging import NeptuneLogger, DummyLogger

train, test, val = get_split_2(annotations_id_train, annotations_id_test, annotations_id_val)
val_dataset = TorchDataset(val)
data_loader_val = torch.utils.data.DataLoader(
val_dataset, batch_size=1, shuffle=False, num_workers=1)

num_epochs = 25
freezed_epochs = 4
BATCH_SIZE = 1 # no gpu space for more, we need to accumulate gradient, and update it every ACCUMULATE steps
ACCUMULATE = 3
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [44]:

model = get_model_instance_segmentation(3).to(device)
# parameters
predictor_params = [p for p in model.roi_heads.box_predictor.parameters() if p.requires_grad]
params = [p for p in model.parameters() if p.requires_grad]
predictor_optimizer = torch.optim.SGD(predictor_params, lr=0.0001,
                                      momentum=0.9)
optimizer = torch.optim.SGD(params, lr=0.0002,
                                      momentum=0.9, weight_decay=0.0005)

lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=10,
                                                   gamma=0.2)

best_loss = 10000
best_precision = 0

logger = NeptuneLogger("Face mask - freez detection", None)
best_loss = 10e5
for i in range(num_epochs):
    if i < freezed_epochs:
        trainig_stats = train_epoch(model, train, predictor_optimizer, logger, ACCUMULATE, True)
    else:
        trainig_stats = train_epoch(model, train, optimizer, logger, ACCUMULATE)
        lr_scheduler.step()
    val_stats = evaluate(model, val)
    
    test_eval = evaluate_metrics(model, data_loader_val, device)
    import io
    from contextlib import redirect_stdout

    f = io.StringIO()
    with redirect_stdout(f):
        test_eval.coco_eval['bbox'].summarize()
    result = f.getvalue()

    result = result.split()
    
    logger.log_metrics({"training_loss": trainig_stats,
                        "val_loss": val_stats,
                        "average_precision": float(result[12]),
                        "average_recall": float(result[117])})
    
    print('Epoch:', i + 1, "Train loss: ", trainig_stats, "Validation loss:", val_stats)
    
    if val_stats < best_loss:
        best_loss = val_stats
        save_model(model, 'model_experiment')
        
    elif best_precision > float(result[12]):
        best_precision = float(result[12])
        save_model(model, 'model_experiment')

del model

psutil is not installed. You will not be able to abort this experiment from the UI.
psutil is not installed. Hardware metrics will not be collected.


https://app.neptune.ai/ziomm23/face-mask/e/FAC-30


ValueError: All bounding boxes should have positive height and width. Found invalid box [0.0, 0.0, 0.0, 0.0] for target at index 0.

In [None]:
print(float(result[12]))