In [1]:
%load_ext autoreload
%autoreload 2
import os

import torch
from torch.utils.data import DataLoader
from datasets import TransformedRoboEireanData, RoboEireanDataWithEncoder
import utils
from pytorch_lightning.callbacks import RichProgressBar
import pytorch_lightning as pl
from models import MultiClassJetNet
import torchvision.transforms as T
torch.manual_seed(2)

<torch._C.Generator at 0x7f3a6c025590>

### RoboEirean Real Data

In [2]:
# # Default scalings for the default boxes determined by k-means clustering
default_box_scalings = torch.tensor(
    [
        [0.06549374, 0.12928654],
        [0.11965626, 0.26605093],
        [0.20708716, 0.38876095],
        [0.31018215, 0.47485098],
        [0.415882, 0.8048184],
        [0.7293086, 0.8216225],
    ]
)


classes = ["robot"]
encoder = utils.Encoder(default_box_scalings, ["robot"])
transformed_train_data = TransformedRoboEireanData(
    os.path.join("data", "transformed", "train"), encoder
)
transformed_val_data = TransformedRoboEireanData(
    os.path.join(
        "data",
        "transformed",
        "val",
    ),
    encoder,
)



### RoboEireann Augmented Data


In [2]:
# Default scalings for the default boxes determined by k-means clustering
default_box_scalings = torch.tensor(
    [
        [0.06549374, 0.12928654],
        [0.11965626, 0.26605093],
        [0.20708716, 0.38876095],
        [0.31018215, 0.47485098],
        [0.415882, 0.8048184],
        [0.7293086, 0.8216225],
    ]
)


classes = ["robot"]
encoder = utils.Encoder(default_box_scalings, ["robot"])

image_transforms = T.Compose(
            [
                T.Grayscale(),
                T.PILToTensor(),
                T.ConvertImageDtype(torch.float32),
                T.Resize((60, 80)),
            ]
        )
bounding_box_transforms = T.Compose([])

raw_train_data = RoboEireanDataWithEncoder(os.path.join("data", "raw", "train"),
                                           encoder,
                                           ["robot"], image_transforms=image_transforms, bounding_box_transforms=bounding_box_transforms)

raw_val_data = RoboEireanDataWithEncoder(os.path.join("data", "raw", "val"),
                                           encoder,
                                           ["robot"], image_transforms=image_transforms, bounding_box_transforms=bounding_box_transforms)
train_loader = DataLoader(
    raw_train_data, batch_size=32, shuffle=True, num_workers=1
)
val_loader = DataLoader(
    raw_val_data, batch_size=32, shuffle=False, num_workers=1
)

### COCO Synthetic Data


In [4]:
# default_box_scalings = torch.tensor(
#     [
#        [ 49.858948,  42.32408 ],
#        [ 79.69058 ,  96.98148 ],
#        [162.30188 , 125.641266],
#        [229.88889 , 248.09436 ],
#        [251.65385 , 434.61536 ],
#        [626.4     , 461.2     ]
#     ]
# )

# image_transforms = T.Compose(
#             [
#                 T.Grayscale(),
#                 T.PILToTensor(),
#                 T.ConvertImageDtype(torch.float32),
#                 T.Resize((60, 80)),
#             ]
#         )
# bounding_box_transforms = T.Compose([])

# raw_train_data = RoboEireanDataWithEncoder(os.path.join("data", "coco_ball_nao", "train"),
#                                            encoder,
#                                            ["robot"], image_transforms=image_transforms, bounding_box_transforms=bounding_box_transforms)

# raw_val_data = RoboEireanDataWithEncoder(os.path.join("data", "coco_ball_nao", "val"),
#                                            encoder,
#                                            ["robot"], image_transforms=image_transforms, bounding_box_transforms=bounding_box_transforms)
# train_loader = DataLoader(
#     raw_train_data, batch_size=32, shuffle=True, num_workers=1
# )
# val_loader = DataLoader(
#     raw_val_data, batch_size=32, shuffle=False, num_workers=1
# )

In [3]:
learning_rate = 2e-3
train_loader = DataLoader(
    raw_train_data, batch_size=32, shuffle=True, num_workers=0
)
val_loader = DataLoader(
    raw_val_data, batch_size=32, shuffle=True, num_workers=0
)

pl_model = MultiClassJetNet(len(classes), default_box_scalings.size(0), learning_rate)
trainer = pl.Trainer(
    limit_predict_batches=100, max_epochs=200, callbacks=[RichProgressBar()]
)
trainer.fit(model=pl_model, train_dataloaders=train_loader, val_dataloaders=val_loader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


RecursionError: maximum recursion depth exceeded while calling a Python object

In [None]:
pl_model.eval()
batch = next(iter(val_loader))


In [None]:
with torch.no_grad():
    predictions_single_batch = pl_model(batch[0])

In [None]:
trainer.validate(model=pl_model, dataloaders=train_loader)

In [None]:
trainer.validate(model=pl_model, dataloaders=val_loader)

In [None]:
# from visualize import draw_model_output, image_grid
# import utils


# grid_size = 1000
# image_list = []
# for i in range(grid_size * grid_size):
#     image, encoded_bounding_boxes, target_masks, encoded_target_classes = raw_train_data[i]
#     predicted_boxes, predicted_class_logits = pl_model(image.unsqueeze(0))
#     predicted_classes = utils.calculate_predicted_classes(predicted_class_logits).squeeze()
#     print(predicted_classes)
#     decoded_boxes = encoder.decode_model_output(predicted_boxes, predicted_classes)
#     image_list.append(
#         draw_model_output(
#             image,
#             decoded_boxes,
#             predicted_classes,
#             torch.tensor([0, 1]),
#         )
#     )
# image_grid(image_list, grid_size, grid_size)
