In [None]:
# locatization
# 1. create train function
# 2. create loss functions
# 3. create models
# classification
# 1. create train function
# 

In [None]:
from torch.utils.data import DataLoader
import torchvision.transforms as T
from torchvision.models import ResNet50_Weights
import pandas as pd
import torch as th 
import pytorch_lightning as pl
from tqdm import tqdm

import src.localization as lc
import src.data_loader as dl


In [None]:
import albumentations as A
import cv2
import matplotlib.pyplot as plt

transform = A.Compose([
    A.RandomCrop(width=450, height=450),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
], bbox_params=A.BboxParams(format='coco'))


transformed = transform(image=image, bboxes=bboxes)
transformed_image = transformed['image']
transformed_bboxes = transformed['bboxes']

In [1]:
import numpy as np
import albumentations as A

np.random.seed(123)
HEIGHT, WIDTH = 720, 1280

def random_bbox():
    x1 = np.random.randint(low=0, high=WIDTH)
    y1 = np.random.randint(low=0, high=HEIGHT)
    x2 = np.random.randint(low=x1 + 1, high=WIDTH + 1)
    y2 = np.random.randint(low=y1 + 1, high=HEIGHT + 1)
    bbox_albu = A.convert_bbox_to_albumentations([x1, y1, x2, y2], source_format='pascal_voc', rows=HEIGHT, cols=WIDTH)
    bbox_yolo = A.convert_bbox_from_albumentations(bbox_albu, target_format='yolo', rows=HEIGHT, cols=WIDTH, check_validity=True)
    # NOTE: at this point the bounding box has been checked to be valid.

    return bbox_yolo


transform = A.Compose(
    [A.HorizontalFlip(), A.RandomBrightnessContrast()],
    bbox_params=A.BboxParams(format='yolo', label_fields=["class_labels"])
)
img = np.zeros((HEIGHT, WIDTH, 3), dtype=np.uint8)

for i in range(1000):
    bboxes = [random_bbox()]
    try:
        transform(image=img, bboxes=bboxes, class_labels=[1])
    except:
        print(f"[{i}] Invalid transformation of box: {str(bboxes[0])}")



In [3]:
import imgaug as ia
import imgaug.augmenters as iaa
from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage


ia.seed(1)

image = ia.quokka(size=(256, 256))
bbs = BoundingBoxesOnImage([
    BoundingBox(x1=65, y1=100, x2=200, y2=150),
    BoundingBox(x1=150, y1=80, x2=200, y2=130)
], shape=image.shape)

seq = iaa.Sequential([
    iaa.Multiply((1.2, 1.5)), # change brightness, doesn't affect BBs
    iaa.Affine(
        translate_px={"x": 40, "y": 60},
        scale=(0.5, 0.7)
    ) # translate by 40/60px on x/y axis, and scale to 50-70%, affects BBs
])

# Augment BBs and images.
image_aug, bbs_aug = seq(image=image, bounding_boxes=bbs)

# print coordinates before/after augmentation (see below)
# use .x1_int, .y_int, ... to get integer coordinates
for i in range(len(bbs.bounding_boxes)):
    before = bbs.bounding_boxes[i]
    after = bbs_aug.bounding_boxes[i]
    print("BB %d: (%.4f, %.4f, %.4f, %.4f) -> (%.4f, %.4f, %.4f, %.4f)" % (
        i,
        before.x1, before.y1, before.x2, before.y2,
        after.x1, after.y1, after.x2, after.y2)
    )

# image with BBs before/after augmentation (shown below)
image_before = bbs.draw_on_image(image, size=2)
image_after = bbs_aug.draw_on_image(image_aug, size=2, color=[0, 0, 255])
plt.show()

BB 0: (65.0000, 100.0000, 200.0000, 150.0000) -> (126.5276, 169.5678, 215.3970, 202.4824)
BB 1: (150.0000, 80.0000, 200.0000, 130.0000) -> (182.4824, 156.4020, 215.3970, 189.3166)


NameError: name 'plt' is not defined

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
img_dir = '../data/train'
annotations_csv = '../data/train.csv'
annotations_df = pd.read_csv(annotations_csv)

transform = T.Compose([
    T.ToPILImage(),
    T.Resize(size=(224, 224), interpolation=T.InterpolationMode.BILINEAR),
    T.ToTensor(), 
    T.Normalize(
        mean=(0.485, 0.456, 0.406), 
        std=(0.229, 0.224, 0.225)
    )
])

class_dict = {
    "albopictus": th.tensor([1, 0, 0, 0, 0, 0]),
    "culex": th.tensor([0, 1, 0, 0, 0, 0]),
    "japonicus/koreicus": th.tensor([0, 0, 1, 0, 0, 0]),
    "culiseta": th.tensor([0, 0, 0, 1, 0, 0]),
    "anopheles": th.tensor([0, 0, 0, 0, 1, 0]),
    "aegypti": th.tensor([0, 0, 0, 0, 0, 1]),
}


In [None]:
model = lc.LocalizationNet()
model

In [None]:
model = lc.MosquitoLocalization()
parameters_backbone = [
    {'params': p, "lr": model.opt_params.get("lr", 1e-4) * 0.01, "weight_decay": model.opt_params.get("weight_decay", 1e-6) * 10}
    for _, p in model.detector.backbone.named_parameters()
]
parameters_mlp = [
    {'params': p, "lr": model.opt_params.get("lr", 1e-4), "weight_decay": model.opt_params.get("weight_decay", 1e-6)}
    for _, p in model.detector.mlp.named_parameters()
]

In [None]:
train_df = annotations_df.sample(frac=0.8,random_state=200)
val_df = annotations_df.drop(train_df.index)

train_dataset = dl.SimpleDetectionDataset(train_df, img_dir, class_dict, transform)
train_dataloader = DataLoader(train_dataset, 
                              batch_size=64, 
                              shuffle=True, 
                              num_workers=12, 
                              persistent_workers=True, 
                              pin_memory=True)

val_dataset = dl.SimpleDetectionDataset(val_df, img_dir, class_dict, transform)
val_dataloader = DataLoader(val_dataset, 
                         batch_size=64, 
                          shuffle=False, 
                          num_workers=12, 
                          persistent_workers=True, 
                          pin_memory=True)

In [None]:
dl.fill_up_cache(annotations_df, img_dir)

In [None]:
print(len(dl.CACHE))


In [None]:
for batch in tqdm(train_dataloader):
    pass

print(len(dl.CACHE))

for batch in tqdm(train_dataloader):
    pass

In [None]:
print(len(dl.CACHE))

In [None]:
th.set_float32_matmul_precision('high')
model = lc.MosquitoLocalization()
trainer = pl.Trainer(accelerator="gpu", 
                     precision='16-mixed',
                     max_epochs=10, 
                     logger=True)
trainer.fit(model=model, 
            train_dataloaders=train_dataloader, 
            val_dataloaders=val_dataloader)

In [None]:
a = next(iter(train_dataloader))

In [None]:
a["bbox_norm"].shape

In [None]:
a["bbox_norm"].dtype

In [None]:
open_clip.list_pretrained()

In [None]:
from transformers import ViTImageProcessor, ViTModel
from PIL import Image
import requests

url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
image = Image.open(requests.get(url, stream=True).raw)

processor = ViTImageProcessor.from_pretrained('facebook/dino-vitb16')
model = ViTModel.from_pretrained('facebook/dino-vitb16')

inputs = processor(images=image, return_tensors="pt")
outputs = model(**inputs)
last_hidden_states = outputs.last_hidden_state


In [None]:
import torch
from PIL import Image
import open_clip

model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-16', pretrained='laion2b_s34b_b88k')

inputs = preprocess(image).reshape(1, 3, 224, 224)

output = model.encode_image(inputs)

In [None]:
last_hidden_states[0, 0].shape

In [None]:
output.shape

In [None]:
model.visual

In [None]:
model

In [None]:
def train_localization_net(net_params = {}, opt_params = {}):
    
    model = th.compile(LocalizationNet(**net_params)).cuda()
    optimizer = th.optim.Adam(model.parameters(), **opt_params)
    
    print('start training')
    for batch in tqdm(train_dataloader):
        optimizer.zero_grad()
        x, y = batch["img"].cuda(), batch["bbox_norm"].cuda()

        y_p = model(x)
        loss = mse_loss(y, y_p)
        with th.no_grad():
            iou = iou_loss(y, y_p)
            
            
        loss.backward()
        optimizer.step()
        
    
    return model
train_localization_net()