In [None]:
import torch

from typing import List

from my_code import BoundingBox

import gradio as gr
import numpy as np

from pathlib import Path
from my_code import WordDetectorNet
import cv2


def run_image_through_network(image_path: Path, model_path: Path=Path('best_model.pth')) -> List[BoundingBox]:

    # ========
    # Settings
    # ========

    model_path = Path('best_model.pth') # later, replace w/ cli argument

    # ================
    # Configure system
    # ================

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # ==========
    # Load model
    # ==========

    model = WordDetectorNet()  # instantiate your model
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device)
    model.eval()

In [None]:
example_image_path = Path('cvl.jpg')

In [None]:
import PIL
from PIL import Image

In [None]:
xxx = Image.open('cvl.jpg')

In [None]:
xxx.mode

In [None]:
image = cv2.imread(str(example_image_path))

In [None]:
image.shape

In [None]:
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

In [None]:
import matplotlib.pyplot as plt

fig, axes = plt.subplots(1, 2, figsize=(12, 6))
axes[0].imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
axes[0].set_title(f'Original Image @ {cv2.cvtColor(image, cv2.COLOR_BGR2RGB).shape}')
axes[0].axis('off')

axes[1].imshow(gray_image, cmap='gray')
axes[1].set_title(f'Grayscale Image @ {gray_image.shape}')
axes[1].axis('off')

plt.tight_layout()
plt.show()

In [None]:
input_size = WordDetectorNet.input_size

In [None]:
resized_gray_image = cv2.resize(gray_image, input_size)

In [None]:
import matplotlib.pyplot as plt

fig, axes = plt.subplots(1, 2, figsize=(12, 6))

axes[0].imshow(gray_image, cmap='gray')
axes[0].set_title(f'Grayscale Image @ {gray_image.shape}')
axes[0].axis('off')

axes[1].imshow(resized_gray_image, cmap='gray')
axes[1].set_title(f'Grayscale Image Resized @ {resized_gray_image.shape}')
axes[1].axis('off')

plt.tight_layout()
plt.show()

In [None]:
from my_code import normalize_image_transform
from my_code import ImageDimensions
from my_code import IAM_Dataset
from my_code import custom_collate_fn

from torch.utils.data import Subset
from torch.utils.data import DataLoader

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

learning_rate = 0.001

val_epoch = 5
val_epoch = 1

summary_writer_path = Path.home() / 'summary_writer_path'

epoch_max = 100 # Simulate full training
# epoch_max = 10
# epoch_max = 0
epoch_max = 3
# epoch_max = 10000

patience_max = 50

# Dataset settings
data_path = Path.home() / 'Development/WordDetectorNN/data/train'
percent_train_data = 80
input_size = ImageDimensions(width=448, height=448)
output_size = ImageDimensions(width=224, height=224)

# Dataloader settings
shuffle_data_loader = True
batch_size = 32
num_workers = 1

In [None]:
# I copied the code from above

# -- datasets --

# Create datasets with different transforms
train_transform = normalize_image_transform
val_transform = normalize_image_transform
# TODO: ^ Implement the augmentations, w/ each changing at every batch

train_dataset = IAM_Dataset(
    root_dir=data_path,
    input_size=input_size,
    output_size=output_size,
    force_rebuild_cache=True,
    transform=train_transform,
)
val_dataset = IAM_Dataset(
    root_dir=data_path,
    input_size=input_size,
    output_size=output_size,
    force_rebuild_cache=True,
    transform=val_transform,
)

assert len(train_dataset) == len(val_dataset)

indices = list(range(len(train_dataset)))
np.random.seed(42)
np.random.shuffle(indices)
split = int(percent_train_data / 100 * len(indices))

train_indices = indices[:split]
val_indices = indices[split:]

train_subset = Subset(train_dataset, train_indices)
val_subset = Subset(val_dataset, val_indices)

train_filenames = [sample['filename'] for sample in train_subset]
val_filenames = [sample['filename'] for sample in val_subset]
# Check that no train samples are in val
assert len(set(train_filenames + val_filenames)) == len(train_filenames) + len(val_filenames)

# assert len(dataset) == len(train_subset) + len(val_subset)

# -- dataloaders --

dataloader_train = DataLoader(
    train_subset,
    batch_size=batch_size,
    shuffle=shuffle_data_loader,
    num_workers=num_workers,
    collate_fn=custom_collate_fn,  # or custom_collate_fn_with_padding
    pin_memory=True  # For faster GPU transfer
)

dataloader_val = DataLoader(
    val_subset,
    batch_size=batch_size,
    shuffle=False, # no need to shuffle validation data and otherwise images break
    num_workers=num_workers,
    collate_fn=custom_collate_fn,  # or custom_collate_fn_with_padding
    pin_memory=True  # For faster GPU transfer
)

In [None]:
train_dataset[0]['image'].shape

In [None]:
batch_sample = next(iter(dataloader_train))

In [None]:
images = batch_sample['images'].to(device)

In [None]:
type(images), images.shape, images.dtype

In [None]:
output = model(images)

In [None]:
output.shape

In [None]:
input_image = torch.from_numpy(resized_gray_image[None, None, :, :]).to(device)

In [None]:
type(input_image), input_image.shape, input_image.dtype

In [None]:
input_image_transformed, _ = normalize_image_transform(input_image, None)

In [None]:
type(input_image_transformed), input_image_transformed.shape, input_image_transformed.dtype

In [None]:
with torch.no_grad():
    output_image = model(input_image_transformed, apply_softmax=True)

In [None]:
from my_code import MapOrdering

In [None]:
output_image.shape

assert output_image[:, MapOrdering.SEG_WORD:MapOrdering.SEG_BACKGROUND+1, :, :].min() >= 0.0
assert output_image[:, MapOrdering.SEG_WORD:MapOrdering.SEG_BACKGROUND+1, :, :].max() <= 1.0

In [None]:
output_image.shape[0]

output_image = output_image.to('cpu').numpy()

In [None]:
from my_code import decode, fg_by_cc, BoundingBox, cluster_aabbs, draw_bboxes_on_image

In [None]:
input_size = ImageDimensions(width=448, height=448)
output_size

In [None]:
i_element_in_batch = 0
y_element = output_image[i_element_in_batch, :, :, :]
decoded_aabbs = decode(y_element, scale=input_size.width / output_size.width, comp_fg=fg_by_cc(thres=0.5, max_num=1000))
img_np = input_image_transformed[i_element_in_batch, 0, :, :].to('cpu').numpy()
h, w = img_np.shape
aabbs = [aabb.clip(BoundingBox(0, 0, w - 1, h - 1)) for aabb in decoded_aabbs]  # bounding box must be inside img
clustered_aabbs = cluster_aabbs(aabbs)
print(len(clustered_aabbs))
vis = draw_bboxes_on_image(img_np, clustered_aabbs)

In [None]:
import matplotlib.pyplot as plt

fig, axes = plt.subplots(1, 2, figsize=(12, 6))

inp = input_image_transformed.to('cpu')[0, 0, :, :]
axes[0].imshow(inp, cmap='gray')
axes[0].set_title(f'NN input @ {inp.shape}')
axes[0].axis('off')

pred = cv2.cvtColor(vis, cv2.COLOR_BGR2RGB)
axes[1].imshow(pred, cmap='gray')
axes[1].set_title(f'NN prediction @ {pred.shape}')
axes[1].axis('off')

plt.tight_layout()
plt.show()