In [1]:
import pdb
from tqdm.notebook import tqdm
import torch.nn as nn
import torch

from ffcv.loader import Loader, OrderOption
from ffcv.fields.decoders import SimpleRGBImageDecoder, BytesDecoder
from ffcv.transforms import RandomHorizontalFlip, Cutout, RandomTranslate, Convert, ToTensor, ToDevice, ToTorchImage
from ffcv.pipeline.operation import Operation
from ffcv.fields import RGBImageField, JSONField, BytesField
import torchvision.transforms
from transformers.image_transforms import center_to_corners_format

from transformers import DetrImageProcessor, DetrForObjectDetection
from datasets import load_dataset
from torch.utils.data import DataLoader
import numpy as np

In [2]:
from torch import functional as F
from ffcv.pipeline.operation import Operation
from dataclasses import replace

In [3]:
from typing import List, Union
from glob import glob
from os import path
from time import time

In [4]:
import wandb
from wandb import AlertLevel

In [5]:
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mdiana15kapatsyn[0m ([33mteam__1[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [5]:
import pandas as pd

In [6]:
from PIL import features
features.check_feature("libjpeg_turbo")

True

In [7]:
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [8]:
print(torch.cuda.is_available())

True


In [9]:
device = torch.device('cuda')

In [10]:
# Truly random shuffling (shuffle=True in PyTorch)
# ORDERING = OrderOption.RANDOM

# Unshuffled (i.e., served in the order the dataset was written)
ORDERING = OrderOption.SEQUENTIAL

# Memory-efficient but not truly random loading
# Speeds up loading over RANDOM when the whole dataset does not fit in RAM!
# ORDERING = OrderOption.QUASI_RANDOM

BATCH_SIZE = 128
NUM_WORKERS = 12
IMAGE_RESOLUTION = 256

In [11]:
class Rescale(torch.nn.Module):
    def __init__(self):
        super(Rescale, self).__init__()
        self.scale = 255.0

    def forward(self, x):
        return x / self.scale

In [18]:
image_mean = [
    0.485,
    0.456,
    0.406
  ]

image_std = [
    0.229,
    0.224,
    0.225
  ]

image_pipeline: List[Operation] = [SimpleRGBImageDecoder(), ToTensor(), Convert(torch.float16), Rescale(), 
                                   ToDevice(torch.device('cuda'), non_blocking=True), ToTorchImage(),
                                   torchvision.transforms.Normalize(image_mean, image_std)]

PIPELINES = {
  'image': image_pipeline
}

# loader = Loader('/shared_drive/user-files/laion_dataset_200M/ffcv-laion200m-1shard.beton', 
#                 batch_size=BATCH_SIZE,
#                 num_workers=NUM_WORKERS,
#                 order=ORDERING,
#                 pipelines=PIPELINES)
loader = Loader('/mnt/disks/disk-big2/ffcv-laion200m.beton', 
                batch_size=BATCH_SIZE,
                num_workers=NUM_WORKERS,
                order=ORDERING,
                pipelines=PIPELINES)

MemoryError: Unable to allocate 16.0 TiB for an array with shape (732944138157,) and data type [('sample_id', '<u8'), ('ptr', '<u8'), ('size', '<u8')]

In [15]:
# help(loader)

## model

In [13]:
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50", revision="no_timm")

model = model.to(device, dtype=torch.float16)

target_sizes = torch.tensor([[IMAGE_RESOLUTION, IMAGE_RESOLUTION]]*BATCH_SIZE).to(device)

In [14]:
def save_results_to_parquet(results, urls, model, step):
    rows = []
    columns = ['url', 'label', 'score', 'top_left_x', 'top_left_y', 'bottom_right_x', 'bottom_right_y']
    for i, result_per_image in enumerate(results):
        for score, label, box in zip(result_per_image["scores"], result_per_image["labels"], result_per_image["boxes"]):
            url = urls[i]
            box = box.detach().cpu()
            top_left_x, top_left_y, bottom_right_x, bottom_right_y = box[0].item(), box[1].item(), box[2].item(), box[3].item()
            label_name = model.config.id2label[label.item()]
            score = np.round(score.detach().cpu().item(), 2)
    
            row = [url, label_name, score, top_left_x, top_left_y, bottom_right_x, bottom_right_y]
            rows.append(row)
        
    df = pd.DataFrame(rows, columns=columns)
    df.to_parquet(f'/mnt/disks/disk-big2/laion200m-od-labels-1shard/{step}_batch.parquet')

In [15]:
def post_process_object_detection(
        outputs, threshold: float = 0.5, target_sizes = None
    ):
        out_logits, out_bbox = outputs.logits, outputs.pred_boxes

        if target_sizes is not None:
            if len(out_logits) != len(target_sizes):
                raise ValueError(
                    "Make sure that you pass in as many target sizes as the batch dimension of the logits"
                )

        # t = time()
        prob = nn.functional.softmax(out_logits, -1)
        scores, labels = prob[..., :-1].max(-1)
        # print(time() - t)

        # t = time()
        # Convert to [x0, y0, x1, y1] format
        boxes = center_to_corners_format(out_bbox)
        # print(time() - t)

        mask = scores > threshold
    
        # mask = mask.cpu()
        # scores = scores.cpu()
        # labels = labels.cpu()
        # boxes = boxes.cpu()

        # t = time()
        # Convert from relative [0, 1] to absolute [0, height] coordinates
        if target_sizes is not None:
            if isinstance(target_sizes, List):
                img_h = torch.Tensor([i[0] for i in target_sizes])
                img_w = torch.Tensor([i[1] for i in target_sizes])
            else:
                img_h, img_w = target_sizes.unbind(1)

            scale_fct = torch.stack([img_w, img_h, img_w, img_h], dim=1).to(boxes.device)
            boxes = boxes * scale_fct[:, None, :]
        # print(time() - t)

        # t = time()
        results = []
    
        # print(len(scores))
        # mask_idx, class_idx = torch.where(mask)
        # class_idx[i, i*128]

        for i in range(len(scores)):
        # for s, l, b in zip(scores, labels, boxes):
            score = scores[i][mask[i]]
            label = labels[i][mask[i]]
            box = boxes[i][mask[i]]
            # print(s.shape)
            results.append({"scores": score, "labels": label, "boxes": box})
        # print(time() - t)
        # print()

        return results

In [19]:
# scores = torch.rand(128, 100)
# mask = scores > 0.5

# coord = torch.nonzero(mask)
# torch.where(mask), torch.where(mask)[0].shape

# Run

In [22]:
run = wandb.init(
    # Set the project where this run will be logged
    project="object-detector",
    # Track hyperparameters and run metadata
    config={
    },
)

In [16]:
t = time()

for step, (batch, label) in tqdm(enumerate(loader)):
    # try:
        label_json = JSONField.unpack(label)
        
        batch_urls = [dict_['url'] for dict_ in label_json]
            
        batch_dict = {'pixel_values': batch}
        
        with torch.inference_mode():
            outputs = model(**batch_dict)
        
        results = post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)
        
        save_results_to_parquet(results=results, 
                                    urls=batch_urls, 
                                    model=model, 
                                    step=step)
    #     wandb.log({"n_batch": step})
    # except:
    #     wandb.alert(title=f"Batch Warning!",
    #                 text=f"Problem with batch {step}",
    #                 level=AlertLevel.WARN)

print(time() - t)
# wandb.alert(title=f"Run finished!",
#             text = f"Objects successfully detected in all {step+1} batches in {np.round((time() - t)/3600, 2)} hours !!! :)",
#                    level=AlertLevel.INFO)
# wandb.finish()

0it [00:00, ?it/s]

KeyboardInterrupt: 

In [21]:
step

60

In [42]:
df = pd.read_parquet('/mnt/disks/disk-big2/laion200m-od-labels-1shard/0_batch.parquet')

In [63]:
df.loc[1]['url']

'http://4.bp.blogspot.com/-VB3VQfCFnng/VIu0_zQj6BI/AAAAAAAAasE/uT9KjqByvLQ/s330/%25E8%25A8%2598%25E4%25BA%258B%25E6%259C%25AC_121214_193308_1.jpg'

In [58]:
df

Unnamed: 0,url,label,score,top_left_x,top_left_y,bottom_right_x,bottom_right_y
0,http://t0.gstatic.com/images?q=tbn:ANd9GcQsZJi...,car,1.00,56.656250,137.5000,152.00000,164.250
1,http://4.bp.blogspot.com/-VB3VQfCFnng/VIu0_zQj...,clock,1.00,118.812500,141.8750,140.12500,160.875
2,http://4.bp.blogspot.com/-VB3VQfCFnng/VIu0_zQj...,person,0.90,124.875000,221.1250,137.87500,246.875
3,http://st.depositphotos.com/3336339/4632/i/170...,tv,0.99,18.000000,21.0625,230.50000,205.250
4,http://img.deanscards.com/thumb/1357368b.jpg,book,0.90,0.875000,43.2500,255.37500,219.250
...,...,...,...,...,...,...,...
168,https://cdn11.bigcommerce.com/s-1mxugrbmxo/ima...,laptop,1.00,21.875000,39.6875,236.12500,221.750
169,http://v.fashionov.com/products/small/122421/2...,person,1.00,175.000000,151.7500,230.75000,249.750
170,http://media.rightmove.co.uk/dir/10k/9111/5323...,car,0.99,0.023438,173.3750,31.46875,226.375
171,http://media.rightmove.co.uk/dir/10k/9111/5323...,car,0.99,193.500000,168.2500,256.00000,230.000


10 batches (128 images) - 1 sec (FFCV loader)\
5 batch (128 images) - 1 sec (HF loader)

# HF dataloader

In [12]:
detr_processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50", revision="no_timm")

In [20]:
FOLDER = '/shared_drive/user-files/laion_dataset_200M/laion200m-data/0-12_2M/'

my_shards = glob(path.join(FOLDER, '*'))
tar_my_shards = [file for file in my_shards if file.endswith('.tar')][:1]

seed=42 
buffer_size=100

iterable_dataset = load_dataset("webdataset", data_files={"train": tar_my_shards}, split="train", 
                       streaming=True)

def get_url(example):
        url = example['json']['url']
        example['url'] = url
        return example
    
iterable_dataset = iterable_dataset.map(get_url)
train_dataset = iterable_dataset
# .shuffle(seed=seed, buffer_size=buffer_size)

In [21]:
tar_my_shards

['/shared_drive/user-files/laion_dataset_200M/laion200m-data/0-12_2M/00682.tar']

In [46]:
def collate_fn(train_dataset): 
    images = []
    urls = []
    
    for example in train_dataset:
        images.append(example['jpg'].convert("RGB"))
        urls.append(example['url'])

    processed = detr_processor(images=images, return_tensors="pt", do_resize=False)
    # print(detr_processor)
    return processed, urls

In [47]:
batch_size = 128
num_workers = 1

dataloader = DataLoader(train_dataset, 
                        batch_size=batch_size,
                        num_workers=num_workers,
                        pin_memory=True,
                        collate_fn=collate_fn)

In [48]:
t = time()
for step, batch in tqdm(enumerate(dataloader)):
    # print(batch['pixel_values'].shape)
    batch_urls = batch[1]
    batch_tensors = batch[0]
    pdb.set_trace()
    
print(time() - t)

0it [00:00, ?it/s]

> [0;32m/tmp/ipykernel_1469033/170280622.py[0m(2)[0;36m<module>[0;34m()[0m
[0;32m      1 [0;31m[0mt[0m [0;34m=[0m [0mtime[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m----> 2 [0;31m[0;32mfor[0m [0mstep[0m[0;34m,[0m [0mbatch[0m [0;32min[0m [0mtqdm[0m[0;34m([0m[0menumerate[0m[0;34m([0m[0mdataloader[0m[0;34m)[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      3 [0;31m    [0;31m# print(batch['pixel_values'].shape)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      4 [0;31m    [0mbatch_urls[0m [0;34m=[0m [0mbatch[0m[0;34m[[0m[0;36m1[0m[0;34m][0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      5 [0;31m    [0mbatch_tensors[0m [0;34m=[0m [0mbatch[0m[0;34m[[0m[0;36m0[0m[0;34m][0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  batch_tensors['pixel_mask'].shape


torch.Size([128, 256, 256])


ipdb>  batch_tensors['pixel_mask'][0].shape


torch.Size([256, 256])


ipdb>  batch_tensors['pixel_mask'][0]


tensor([[1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1],
        ...,
        [1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1]])


ipdb>  batch_tensors['pixel_mask'][0].min()


tensor(1)


ipdb>  batch_tensors['pixel_mask'][0].max()


tensor(1)


ipdb>  batch_tensors['pixel_mask'][10].min()


tensor(1)


ipdb>  batch_tensors['pixel_mask'].min()


tensor(1)


ipdb>  batch_tensors['pixel_mask'].max()


tensor(1)


ipdb>  q
