In [None]:
import yaml
import os
config = yaml.safe_load(open("../config/datasets.yml", "r"))

In [None]:
DATASET = "AbbySlidingWindow"
DATASET_DIR = config[DATASET]["path"]
print(DATASET_DIR)

In [None]:
from fish_benchmark.utils import get_files_of_type
print(get_files_of_type("/share/j_sun/jth264/abby/test/GX017042_clips_and_labels", ".txt"))

In [None]:
from fish_benchmark.data.dataset import get_dataset
dataset = get_dataset(
    DATASET,
    DATASET_DIR, 
    model_name="videomae", 
    train=False
)

In [None]:
print(13032/32)

In [None]:
from tqdm import tqdm
for frame, label in tqdm(dataset):
    pass


In [None]:
frame, label = next(iter(dataset))
print(frame.shape)
print(label.shape)

In [None]:
from fish_benchmark.utils import get_files_of_type
import numpy as np
annotation_files = get_files_of_type(DATASET_DIR, ".txt")

annotations = []
for file in annotation_files:
    annotations.append(np.loadtxt(file, delimiter="\t"))

annotations = np.concatenate(annotations, axis=0)
annotations.shape

In [None]:
annotations.sum(axis=0)

In [None]:
from fish_benchmark.data.dataset import AbbyDataset
from fish_benchmark.models import get_input_transform

In [None]:
input_transform = get_input_transform("videomae")
dataset = AbbyDataset(
        DATASET_DIR, 
        train=True, 
        transform=input_transform, 
        label_type='onehot', 
        window_size=16, 
        tolerance_region = 7,
        samples_per_window = 16,
        step_size=1, 
        is_image_dataset=False
)

frame, label = next(iter(dataset))
from fish_benchmark.debug import serialized_size
print(serialized_size(frame))

In [None]:
from tqdm import tqdm
import torch


labels = []
for frame, label in tqdm(dataset):
    labels.append(label)

labels = torch.stack(labels)
print(labels.sum(axis=0)/labels.shape[0])

In [None]:
import time
import torch 
from fish_benchmark.debug import serialized_size
x = torch.randn(16, 3, 224, 224)
start = time.time()
torch.save(x, "/share/j_sun/jth264/test.pt")
print("Time:", time.time() - start)

In [None]:
clips = [torch.randn(16, 3, 224, 224) for _ in range(100)]
stacked = torch.stack(clips)
clip0 = stacked[0]

print(clip0.storage().size())          # Very large! Entire tensor storage
print(serialized_size(clip0))          # Very large! ~1GB, same as full tensor
print(clip0.clone().storage().size())  # Just right! ~9MB worth

In [1]:
import torch
from fish_benchmark.data.preprocessors import TorchVisionPreprocessor
from fish_benchmark.models import get_input_transform
from fish_benchmark.data.dataset import get_dataset
img_tensor = torch.randint(0, 256, (3, 480, 640), dtype=torch.uint8)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import yaml
from fish_benchmark.data.dataset import MikeDataset
DATASET = "AbbySlidingWindow"
MODEL =  None # "multipatch_dino"
config = yaml.safe_load(open("../config/datasets.yml", "r"))
input_transform = get_input_transform(MODEL) if MODEL else None
dataset = get_dataset(
    DATASET, 
    path = "/share/j_sun/jth264/abby/train/GX017102_annotations", 
    augs=input_transform,
    shuffle=True
)

In [4]:
from fish_benchmark.utils import get_files_of_type
print(get_files_of_type("/share/j_sun/jth264/bites_frame_annotation_splitted/train/AT_070523_GH010367", ".tar"))

['/share/j_sun/jth264/bites_frame_annotation_splitted/train/AT_070523_GH010367/AT_070523_GH010367-00000000..00000999.tar', '/share/j_sun/jth264/bites_frame_annotation_splitted/train/AT_070523_GH010367/AT_070523_GH010367-00001000..00001999.tar', '/share/j_sun/jth264/bites_frame_annotation_splitted/train/AT_070523_GH010367/AT_070523_GH010367-00002000..00002999.tar', '/share/j_sun/jth264/bites_frame_annotation_splitted/train/AT_070523_GH010367/AT_070523_GH010367-00003000..00003999.tar', '/share/j_sun/jth264/bites_frame_annotation_splitted/train/AT_070523_GH010367/AT_070523_GH010367-00004000..00004999.tar', '/share/j_sun/jth264/bites_frame_annotation_splitted/train/AT_070523_GH010367/AT_070523_GH010367-00005000..00005999.tar', '/share/j_sun/jth264/bites_frame_annotation_splitted/train/AT_070523_GH010367/AT_070523_GH010367-00006000..00006999.tar', '/share/j_sun/jth264/bites_frame_annotation_splitted/train/AT_070523_GH010367/AT_070523_GH010367-00007000..00007999.tar', '/share/j_sun/jth264/bi

In [None]:
from tqdm import tqdm
for clip, label in tqdm(dataset):
    pass

In [5]:
clip, label = next(iter(dataset))

In [6]:
clip.shape

torch.Size([16, 3, 220, 220])

In [None]:
random_example = torch.rand(4, 3, 760, 1352)

In [7]:
from PIL import Image
from torchvision.transforms import v2
from torchvision.transforms import InterpolationMode
import torch
from fish_benchmark.debug import step_timer

class TorchVisionPreprocessor:
    def __init__(self, crop_size=(224, 224), resize_shortest=256,
                 mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225),
                 interpolation=InterpolationMode.BICUBIC):

        self.resize = v2.Resize(resize_shortest, interpolation=interpolation, antialias=False)
        self.crop = v2.CenterCrop(crop_size)
        self.mean = torch.tensor(mean).view(3, 1, 1)
        self.std = torch.tensor(std).view(3, 1, 1)

    def __call__(self, image_tensor: torch.Tensor) -> torch.Tensor:
        with step_timer("resize"):
            image_tensor = self.resize(image_tensor)
        with step_timer("crop"):
            image_tensor = self.crop(image_tensor)

        with step_timer("normalize"):
            image_tensor = (image_tensor - self.mean) / self.std
            #image_tensor = self.normalize(image_tensor)

        return image_tensor

In [16]:

preprocessor = TorchVisionPreprocessor()
with step_timer("Preprocess"):
    print(preprocessor(clip).shape)

[resize] took 0.021542 seconds
[crop] took 0.000070 seconds
[normalize] took 0.007196 seconds
torch.Size([16, 3, 224, 224])
[Preprocess] took 0.029095 seconds
