In [1]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
from torchvision import datasets, models, transforms
from torch.utils.data import Dataset
import matplotlib.pyplot as plt
import time
import os
import copy
from math import floor
import cv2


cudnn.benchmark = True
plt.ion()   # interactive mode

  from .autonotebook import tqdm as notebook_tqdm


<contextlib.ExitStack at 0x7f044c467550>

In [2]:
gun_model = torch.hub.load('ultralytics/yolov5', 'custom', path='/home/t/tianqi/CS4243_proj/my_utils/best1110.pt')
gun_model.conf = 0.1
knife_model = torch.hub.load('ultralytics/yolov5', 'custom', path='/home/t/tianqi/CS4243_proj/my_utils/knife_best.pt')
knife_model.conf = 0.01

def get_classifcation_bounding_box(file_path, model, asize):
    results = model(file_path, size=asize)

    objects = []
    for obj in results.xyxy[0]:
        objects.append(
            {
                "class": int(obj[5]), 
                "xmin": float(obj[0]),
                "ymin": float(obj[1]),
                "xmax": float(obj[2]),
                "ymax": float(obj[3]),
            })
    return objects

def detect_overlap(mask, xmin, xmax, ymin, ymax):
    return np.count_nonzero(mask[ymin:ymax, xmin:xmax]) > 0

def get_seg_file(img_path):
    paths = img_path.split('/')
    paths[0] = "seg"
    seg_dir = '/'.join(paths)
    im_read = cv2.imread(seg_dir, 0)
    return im_read

def get_seg_bin(img_path, thres=50):
    seg = get_seg_file(img_path)
    ret, mask = cv2.threshold(seg, thres, 1, cv2.THRESH_BINARY)
    return mask

count = 0
def combine_mask_bounding_box(file_path, mask_thres, box_thres):
    global count
    gun_boxes = get_classifcation_bounding_box(file_path, gun_model, 540)
    knife_boxes = get_classifcation_bounding_box(file_path, knife_model, 640)
    count += len(knife_boxes)

    boxes = gun_boxes + knife_boxes
    mask = get_seg_bin(file_path, thres=mask_thres)
    r, c = mask.shape

    for box in boxes:
        xmin, xmax, ymin, ymax = floor(box["xmin"]), floor(box["xmax"]), floor(box["ymin"]), floor(box["ymax"])
        if detect_overlap(mask, xmin, xmax, ymin, ymax):
            mask[max(0, ymin-box_thres):min(ymax+box_thres, r), max(0, xmin-box_thres):min(xmax+box_thres, c)] = 1
    return mask


Using cache found in /home/t/tianqi/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2022-11-10 Python-3.8.10 torch-1.12.1+cu102 CUDA:0 (NVIDIA TITAN RTX, 24220MiB)

Fusing layers... 
Model summary: 157 layers, 7015519 parameters, 0 gradients, 15.8 GFLOPs
Adding AutoShape... 
Using cache found in /home/t/tianqi/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2022-11-10 Python-3.8.10 torch-1.12.1+cu102 CUDA:0 (NVIDIA TITAN RTX, 24220MiB)

Fusing layers... 
YOLOv5s summary: 213 layers, 7012822 parameters, 0 gradients
Adding AutoShape... 


In [3]:
class SegDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.segs = []
        self.labels = []
        self.__preprocess__()


    def __preprocess__(self):
        subfolders = ['carrying', 'normal', 'threat']
        for i in range(len(subfolders)):
            # print(subfolders[i])
            files = os.listdir(os.path.join(self.root_dir, subfolders[i]))
            for f in files:
                img_path = os.path.join(self.root_dir, subfolders[i], f)
                mask = combine_mask_bounding_box(img_path, 0, 50)
                img = cv2.imread(img_path)

                seg_img = cv2.bitwise_and(img, img, mask = mask)

                self.segs.append(seg_img)
                self.labels.append(i)

    def __len__(self):
        return len(self.segs)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        image = self.segs[idx]
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label


data_transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])


data_dir = 'data'
image_datasets = {x: SegDataset(os.path.join(data_dir, x), transform=data_transforms)
                  for x in ['train', 'val', 'test']}


dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
                                             shuffle=True, num_workers=4)
              for x in ['train', 'val', 'test']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val', 'test']}
# class_names = image_datasets['train'].classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [4]:
torch.cuda.get_device_properties(torch.device)

_CudaDeviceProperties(name='NVIDIA TITAN RTX', major=7, minor=5, total_memory=24220MB, multi_processor_count=72)

In [5]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val', 'test']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:4f}')

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [6]:
model_ft = models.resnet50(pretrained=True)
num_ftrs = model_ft.fc.in_features
# Here the size of each output sample is set to 2.
# Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)).
model_ft.fc = nn.Linear(num_ftrs, 3)

model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
learning_rate = 0.0005
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=learning_rate, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)



In [7]:
print(learning_rate)
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=40)

0.0005
Epoch 0/39
----------
train Loss: 1.1365 Acc: 0.4005
val Loss: 0.9216 Acc: 0.5223
test Loss: 0.9081 Acc: 0.5632

Epoch 1/39
----------
train Loss: 0.9718 Acc: 0.5068
val Loss: 0.8467 Acc: 0.6089
test Loss: 0.8402 Acc: 0.6053

Epoch 2/39
----------
train Loss: 0.8109 Acc: 0.6350
val Loss: 0.9210 Acc: 0.5932
test Loss: 0.8855 Acc: 0.6158

Epoch 3/39
----------
train Loss: 0.6479 Acc: 0.7232
val Loss: 0.7912 Acc: 0.7297
test Loss: 0.7774 Acc: 0.7211

Epoch 4/39
----------
train Loss: 0.5697 Acc: 0.7677
val Loss: 0.6185 Acc: 0.7585
test Loss: 0.6117 Acc: 0.7526

Epoch 5/39
----------
train Loss: 0.5134 Acc: 0.7851
val Loss: 0.5993 Acc: 0.7848
test Loss: 0.6235 Acc: 0.7684

Epoch 6/39
----------
train Loss: 0.4649 Acc: 0.8183
val Loss: 0.8309 Acc: 0.7113
test Loss: 0.8018 Acc: 0.6947

Epoch 7/39
----------
train Loss: 0.3359 Acc: 0.8786
val Loss: 0.4508 Acc: 0.8320
test Loss: 0.4501 Acc: 0.8211

Epoch 8/39
----------
train Loss: 0.2592 Acc: 0.9125
val Loss: 0.4358 Acc: 0.8399
test Lo

In [9]:
torch.save(model_ft.state_dict(), "seg_resnet50.pt")

In [10]:
from sklearn.metrics import classification_report

y_pred = []
y_true = []

model_ft.eval()

for inputs, labels in dataloaders["test"]:
    inputs = inputs.to(device)
    labels = labels.to(device)

    outputs = model_ft(inputs)
    _, preds = torch.max(outputs, 1)
    y_pred.extend(preds.data.cpu())
    y_true.extend(labels.data.cpu())


print(classification_report(y_true, y_pred, labels=[0,1,2]))

              precision    recall  f1-score   support

           0       0.77      0.86      0.81        79
           1       0.78      0.81      0.80        43
           2       0.88      0.74      0.80        68

    accuracy                           0.81       190
   macro avg       0.81      0.80      0.80       190
weighted avg       0.81      0.81      0.80       190

