In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from card_detector import CardDetector, fit
from dataloader_utils import MTGCardsDataset, get_transform_pipe
from config import *

In [None]:
anchor_boxes = torch.Tensor([[198.27963804, 206.74086672],
       [129.59395666, 161.90171490],
       [161.65437828, 232.34624509]
]) # Anchor boxes acquired from k-means clustering of the dataset

model = CardDetector(
  img_dims= (CONFIG["dataset"]["img_w"], CONFIG["dataset"]["img_h"]),
  anchor_boxes=anchor_boxes,
  num_anchors_per_cell=CONFIG["dataset"]["num_anchors_per_cell"],
)

feature_map_dims = (model.features_w, model.features_h)

transform_pipe = get_transform_pipe(img_w=CONFIG["dataset"]["img_w"], img_h=CONFIG["dataset"]["img_h"])

train_dataset = MTGCardsDataset(
  annotations_file=CONFIG["dataset"]["annotations_file_train"],
  img_dir=CONFIG["dataset"]["img_dir"], #TODO: change directory when we have the actual data
  anchor_boxes=model.anchor_boxes,
  feature_map_dims=feature_map_dims,
  img_dims= (CONFIG["dataset"]["img_w"], CONFIG["dataset"]["img_h"]),
  num_anchors_per_cell=CONFIG["dataset"]["num_anchors_per_cell"],
  num_max_boxes=1,
  transform=transform_pipe,
  limit=CONFIG["dataset"]["limit"]
)
train_dataloader = DataLoader(train_dataset, batch_size=CONFIG["dataloader"]["batch_size"])

val_dataset = MTGCardsDataset(
  annotations_file=CONFIG["dataset"]["annotations_file_val"],
  img_dir=CONFIG["dataset"]["img_dir"], #TODO: change directory when we have the actual data
  anchor_boxes=model.anchor_boxes,
  feature_map_dims=feature_map_dims,
  img_dims= (CONFIG["dataset"]["img_w"], CONFIG["dataset"]["img_h"]),
  num_anchors_per_cell=CONFIG["dataset"]["num_anchors_per_cell"],
  num_max_boxes=1,
  transform=transform_pipe,
  limit=CONFIG["dataset"]["limit"]
)
val_dataloader = DataLoader(val_dataset, batch_size=CONFIG["dataloader"]["batch_size"]) 

In [None]:
opt = torch.optim.Adam(params=model.parameters(), lr=CONFIG["optimizer"]["lr"])
fit(
    model=model,
    num_epochs=CONFIG["optimizer"]["num_epochs"],
    optimizer=opt, 
    train_dataloader=train_dataloader,
    val_dataloader=val_dataloader,
    device=("cuda" if torch.cuda.is_available() else "cpu")
)

In [None]:
import torchvision.transforms.functional as fn
from torchvision.io import read_image, ImageReadMode
from torchvision.utils import draw_bounding_boxes


images, labels = next(iter(val_dataloader))

# Load the unaltered (not normalized) version of the image
test_image = read_image("data/aug_val/1060_2.png", mode=ImageReadMode.RGB)

# Predict the bounding box and get the true box
model = model.to("cpu")
pred_box = model.predict(images[1]).squeeze(0)
true_box = model.predict(images[1], ground_truth=labels[1].unsqueeze(0)).squeeze(0)

print(pred_box)
print(true_box)

bbox_img_tensor = draw_bounding_boxes(image=test_image, boxes=pred_box, width=3, colors=(255,255,0))

fn.to_pil_image(bbox_img_tensor)

In [None]:
bbox_img_tensor = draw_bounding_boxes(image=test_image, boxes=true_box, width=3, colors=(255,255,0))

fn.to_pil_image(bbox_img_tensor)

In [None]:
fn.to_pil_image(images[1])

In [None]:
test_image.shape

In [None]:
import numpy as np
import torchvision.transforms as transforms

test_array = np.array(test_image).transpose(1, 2, 0)
position = [442.5,269.0,95,128]
offsetx = int(position[0] - position[2]/2)
offsety = int(position[1] - position[3]/2)
width = position[2]
height = position[3]

test_array[offsety:offsety + height, offsetx:offsetx+1] = 255
test_array[offsety:offsety + height, offsetx + width:offsetx + width + 1] = 255

test_array[offsety:offsety + 1, offsetx:offsetx + width] = 255
test_array[offsety + height:offsety + height + 1, offsetx:offsetx + width] = 255

test_bg = transforms.ToPILImage()(test_array)
test_bg