In [None]:
# basic python and ML Libraries
import os
import random
import numpy as np
import pandas as pd
import pathlib

# reading images using OpenCV
import cv2

# matplotlib & others for visualization
import matplotlib.pyplot as plt
import matplotlib.patches as patches


# torchvision libraries
import torch
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# Miscellaneous
from torchinfo import summary
from tqdm.auto import tqdm
from timeit import default_timer as timer
from typing import List, Union

# for images
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
from PIL import Image, ImageDraw

device = "cuda" if torch.cuda.is_available() else "cpu"
root = pathlib.Path("data") / "obj_detection_tutorial" / 'data'


In [None]:
# functions
def show_bbox(image_path, label_path):
    image = Image.open(image_path)
    draw = ImageDraw.Draw(image)
    with open(label_path, 'r') as file:
        for line in file.readlines():
            label, x, y, w, h = line.split(' ')
            x = float(x)
            y = float(y)
            w = float(w)
            h = float(h)

            W, H = image.size
            x1 = (x - w/2) * W
            y1 = (y - h/2) * H
            x2 = (x + w/2) * W
            y2 = (y + h/2) * H
            
            draw.rectangle((x1,y1,x2,y2), outline = (255,0,0), width = 5)
    image.show()

def tensorprint(tensor):
    print("Shape: " , tensor.shape, " , Dimension: ", tensor.ndim , " \nDtype: ", tensor.dtype, " , Device: ", tensor.device)
    print("Max: ", tensor.amax(),f'[{tensor.argmax()}]', " , Min: ", tensor.amin(),f'[{tensor.argmin()}]')
    #print(tensor ,'\n')

def torch_rng():
    torch.manual_seed(42)
    torch.cuda.manual_seed(42)

def collate_fn(batch):
    return tuple(zip(*batch))

def OD_train_step(model, optimizer, data_loader, device = device):
    model.train()
    train_loss_list = []
    c = 0

    for images , targets in data_loader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()
        train_loss_list.append(loss_value)


        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        c+=1

        if c % 4 == 0:
            print(f"Training loss: {loss_value}")

    return train_loss_list

def OD_test_step(model, optimizer, data_loader, device = device):
    val_loss_list = []
    c= 0

    for images, targets in data_loader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        with torch.inference_mode():
            loss_dict = model(images, targets)
            
            losses = sum(loss for loss in loss_dict.values())
            loss_value = losses.item()
            val_loss_list.append(loss_value)
            c+=1

            if c % 4 == 0:
                print(f"Testing loss: {loss_value}")

        return val_loss_list



In [None]:
# show image
# image_p = train_dir / '1.jpg'
# label_p = root / 'labels' / 'train' / '1.txt'
#show_bbox(image_p, label_p)

In [None]:
# setup
train_dir = root / 'Images' / "train"
test_dir = root / 'Images' / 'test'
val_dir = root / 'Images' / 'val'
train_label_dir = root /'labels' / 'train'
test_label_dir = root /'labels' / 'test'
val_label_dir = root /'labels' / 'val'

In [None]:
# dataset 

class ConeDataset(torch.utils.data.Dataset):
    def __init__(self, image, label , transforms) -> None:
        self.image = image
        self.labelp = label
        self.transforms = transforms
        self.imgs = list(sorted(os.listdir(image)))
        self.label = list(sorted(os.listdir(label)))
    
    def __getitem__(self,idx):
        W, H =  224,224
        img_path = os.path.join(self.image , self.imgs[idx])
        label_path = os.path.join(self.labelp, self.label[idx])
        img = Image.open(img_path)
        img = img.resize((W,H), Image.ANTIALIAS)
        img = img.convert("RGB")

        boxes = []
        labels = []

    
        with open(label_path) as file:
            for line in file:
                labels.append(1)

                parsed = [float(x) for x in line.split(' ')]
                x_center = parsed[1]
                y_center = parsed[2]
                box_wt = parsed[3]
                box_ht = parsed[4]

                xmin = int((x_center - box_wt/2)*W)
                xmax = int((x_center + box_wt/2)*W)
                ymin = int((y_center - box_ht/2)*H)
                ymax = int((y_center + box_ht/2)*H)

                boxes.append([xmin,ymin,xmax,ymax])
        
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels,dtype=torch.int64)
        area = ((boxes[:,3] - boxes[:,1]) * (boxes[:, 2] - boxes[:, 0]))
        iscrowd = torch.zeros(boxes.shape[0], dtype = torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img , target = self.transforms(img, target)
        
        return img, target

    def __len__(self):
        return len(self.imgs)
        

e = ConeDataset(train_dir, train_label_dir, None)
e[0]
        

In [None]:
# dataloading 
BATCH_SIZE = 4

def get_transform(img, target):
    train_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),                        
    ])

    return train_transform(img), target


train_data = ConeDataset(train_dir, train_label_dir,get_transform)
test_data = ConeDataset(test_dir, test_label_dir,get_transform)                                

train_dataloader = DataLoader(dataset=train_data, 
                                batch_size=BATCH_SIZE,
                                shuffle=True,
                                collate_fn=collate_fn)
test_dataloader = DataLoader(dataset=test_data,
                                batch_size=BATCH_SIZE,
                                shuffle=False,
                                collate_fn=collate_fn)
                                
img, target = next(iter(test_dataloader))
images = list(image.to(device) for image in img)
targets = [{k : v.to(device) for k , v in t.items()} for t in target]

In [None]:
# model
model_6 = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")
in_features = model_6.roi_heads.box_predictor.cls_score.in_features
num_classes = 2
model_6.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
model_6.to(device)
model_6(images, targets)

In [None]:
# Training
params = [p for p in model_6.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=3,gamma=0.1)

epochs = 5
for epoch in range(epochs):
    train_results = OD_train_step(model_6,optimizer,train_dataloader)
    lr_scheduler.step()
    test_results  = OD_test_step(model_6,optimizer,test_dataloader)
    

In [None]:
# prediction
val = os.listdir(val_dir)

for i in range(len(val)):
    joinedp = os.path.join(val_dir, val[i])
    image = cv2.imread(joinedp)
    orig_image = image.copy()
    image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB).astype(np.float32)
    image /= 255.0
    image = np.transpose(image, (2,0,1)).astype(np.float32)
    image = torch.tensor(image, dtype=torch.float).to(device)
    image = torch.unsqueeze(image, 0)

    with torch.inference_mode():
        model_6.eval()
        output = model_6(image.to(device))

    outputs = [{k: v.to('cpu') for k, v in t.items()} for t in output]

    boxes = outputs[0]['boxes'].data.numpy()
    scores = outputs[0]['scores'].data.numpy()
    
    boxes = boxes[scores >= 0.8].astype(np.int32)
    draw_boxes = boxes.copy()
    pred_classes = [i for i in outputs[0]['labels'].cpu().numpy()]
    
    for k, box in enumerate(draw_boxes):
        cv2.rectangle(orig_image,
                    (int(box[0]), int(box[1])),
                    (int(box[2]), int(box[3])),
                    (0,0,255), 2)
        cv2.putText(orig_image, str(pred_classes[k]),
                    (int(box[0]), int(box[1]-5)),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0),
                    2, lineType=cv2.LINE_AA)

        cv2.imshow('Prediction', orig_image)
        cv2.waitKey(10000)
        cv2.destroyAllWindows()




In [None]:
# saving / loading
# MODEL_PATH = pathlib.Path("Models")
# MODEL_NAME = "03_ODTC.pt"
# MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME
# torch.save(model_6.state_dict(), MODEL_SAVE_PATH)

# path_rel = r'C:\Users\earle\PythonMLenv\env\projects\Models\03_TFC.pt'
# model_5.load_state_dict(torch.load(path_rel))
# model_5.eval()
