In [None]:
import json
import math
import os
from pathlib import Path
import matplotlib.pyplot as plt
from torch.nn import MSELoss, L1Loss
from torchvision import transforms
import torch
import onnx
from onnx2torch import convert
import random
import torch.nn as nn

from app.utils.dataset import DepthDataset, create_data_loader
from app.utils.env import Env
from models.train_depth_model import SILogLoss, BerHuLoss, evaluate
import numpy as np

from train_classification import ClassificationDataset
from torchmetrics.regression import RelativeSquaredError, MeanAbsolutePercentageError


In [None]:
output_path = Path("output_remote")

files = output_path.glob("*.json")

model_data = {}

for file in files:
    name = file.name.replace(".json", "")

    with open(file) as f:
        data = json.load(f)
        model_data[name] = data

n_models = len(model_data)
print(model_data)

In [None]:
fig = plt.figure(figsize=(25, 15))

i = 1

for name, data in model_data.items():
    ax = fig.add_subplot(math.ceil(n_models/2), 2, i)

    ax.plot(data['train_loss'], label="train_loss")
    ax.plot(data['val_loss'], label="val_loss")
    ax.set_title(name)
    ax.legend()

    i += 1


In [None]:
env = Env()

image_size = 256
transform = transforms.Compose([
    transforms.Resize((256, 848)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

target_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((256, 848)),
    transforms.ToTensor()
])

train_dataset = DepthDataset(
    root_dir=env.dataset_path / "train",
    transform=transform,
    target_transform=target_transform,
    limit_cameras=not env.both_cameras
)

val_dataset = DepthDataset(
    root_dir=env.dataset_path / "val",
    transform=transform,
    target_transform=target_transform,
    limit_cameras=not env.both_cameras
)

val_dataset_raw = DepthDataset(
    root_dir=env.dataset_path / "val",
    transform=transforms.Compose([
        transforms.Resize((256, 848)),
        transforms.ToTensor()
    ]),
    target_transform=target_transform,
    limit_cameras=not env.both_cameras
)

val_loader = create_data_loader(val_dataset, batch_size=16, shuffle=False)


In [None]:
val_dataset_class = ClassificationDataset(
    root_dir=env.dataset_path / "val",
    transform=transforms.Compose([
        transforms.Resize((256, 848)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]),
    limit_cameras=not env.both_cameras,
    num_classes=80,
    max_depth=80
)

In [None]:
loss_si = SILogLoss()
loss_mse = MSELoss()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

fig = plt.figure(figsize=(25, n_models * 3 * 4))

i = 1

for name in model_data.keys():
    random.seed(23 ^ 3)
    onnx_model = onnx.load(output_path / f"{name}.onnx")
    model = convert(onnx_model)
    model.to(device)

    # si_loss = evaluate(model, val_loader, loss_si, device)
    # mse_loss = evaluate(model, val_loader, loss_mse, device)

    for _ in range(3):
        ax_raw = fig.add_subplot(n_models*3, 3, i)
        i += 1
        ax_pred = fig.add_subplot(n_models*3, 3, i)
        i += 1
        ax_target = fig.add_subplot(n_models*3, 3, i)
        i += 1

        idx = random.randint(0, len(val_dataset) - 1)

        correct_pixels = 0
        total_pixels = 0

        if "Classification" in name:
            raw_image, depth_image = val_dataset_class[idx]
            raw_image = raw_image.unsqueeze(0).to(device)  # [1, 3, H, W]
            with torch.no_grad():
                prediction = model(raw_image).cpu()
                prediction = nn.functional.interpolate(prediction, size=depth_image.shape[-2:], mode='bilinear',
                                                       align_corners=True)

                _, prediction = torch.max(prediction, 1)

                mask = depth_image != 0
                correct_pixels += ((prediction == depth_image) * mask).sum().item()
                total_pixels += mask.sum().item()

            depth_np = depth_image.cpu().numpy()
            prediction = prediction.squeeze().cpu().numpy()

            # print(np.max(prediction), np.min(prediction))
            print(correct_pixels / total_pixels, total_pixels)
        else:
            raw_image, depth_image = val_dataset[idx]
            raw_image = raw_image.unsqueeze(0).to(device)  # [1, 3, H, W]
            with torch.no_grad():
                prediction = model(raw_image)

                if prediction.shape[-2:] != depth_image.shape[-2:]:
                    prediction = nn.functional.interpolate(prediction, size=depth_image.shape[-2:], mode="bilinear",
                                                           align_corners=True)

                prediction = prediction.squeeze().cpu().numpy()

            raw_image, _ = val_dataset_raw[idx]
            # mask = depth_image == -1
            # mask = mask[0]
            # prediction[mask] = -1
            #
            # depth_image = depth_image.squeeze().cpu().numpy()
            #
            # mask = ~mask
            #
            # depth_image[mask] = (depth_image[mask] - depth_image[mask].min()) / max(depth_image[mask].max() - depth_image[mask].min(), 1e-8)
            # prediction[mask] = (prediction[mask] - prediction[mask].min()) / max(prediction[mask].max() - prediction[mask].min(), 1e-8)
            #
            # depth_image[~mask] = 0
            # prediction[~mask] = 0

            mask = depth_image.squeeze().numpy() < 0
            prediction[mask] = -1

            prediction[~mask] = (prediction[~mask] - prediction[~mask].min()) / (
                    prediction[~mask].max() - prediction[~mask].min())
            prediction[mask] = 0

            depth_np = depth_image.squeeze().cpu().numpy()
            depth_np[~mask] = (depth_np[~mask] - depth_np[~mask].min()) / (
                        depth_np[~mask].max() - depth_np[~mask].min())
            depth_np[mask] = 0

        ax_raw.imshow(raw_image.squeeze().permute(1, 2, 0).cpu(), cmap='plasma')
        ax_raw.set_title("Raw Image")
        ax_raw.axis('off')

        ax_pred.imshow(prediction, cmap='plasma')
        ax_pred.set_title(f"Predicted Depth: {name}")
        ax_pred.axis('off')

        ax_target.imshow(depth_np, cmap='plasma')
        ax_target.set_title("Ground Truth Depth")
        ax_target.axis('off')

print("done")

In [None]:
from torchmetrics import MeanAbsoluteError
import pandas as pd
import seaborn as sns

loss_si = SILogLoss()
loss_mse = MSELoss()
loss_mape = MeanAbsolutePercentageError().to(device)
loss_mae = MeanAbsoluteError().to(device)
# loss_rse = RelativeSquaredError().to(device)

losses = [loss_si, loss_mse, loss_mape, loss_mae]

loss_data = {
    "loss": [],
    "loss_type": [],
    "network": [],
    "loss_fn": []
}

val_loader =  create_data_loader(val_dataset, shuffle=False, batch_size=16)
train_loader = create_data_loader(train_dataset, shuffle=False, batch_size=16)

i = 0

for name in model_data.keys():
    i += 1
    random.seed(23 ^ 3)
    onnx_model = onnx.load(output_path / f"{name}.onnx")
    model = convert(onnx_model)
    model.to(device)
    model.eval()


    for l in losses:
        loss_name = type(l).__name__

        train_loss = 0.0

        with torch.no_grad():
            for data, target in train_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)

                if output.shape[-2:] != target.shape[-2:]:
                    output = nn.functional.interpolate(output, size=target.shape[-2:], mode="bilinear", align_corners=True)

                mask = target > 0
                target = target[mask]
                output = output[mask]

                train_loss += l(output, target).item()

        avg_train_loss = train_loss / len(train_loader)

        loss_data["loss"].append(avg_train_loss)
        loss_data['loss_type'].append("Train")
        loss_data["network"].append(f"Network: {i}")
        loss_data["loss_fn"].append(loss_name)

        val_loss = 0.0

        with torch.no_grad():
            for data, target in val_loader:
                data, target = data.to(device), target.to(device)
                output = model(data)

                if output.shape[-2:] != target.shape[-2:]:
                    output = nn.functional.interpolate(output, size=target.shape[-2:], mode="bilinear", align_corners=True)

                mask = target > 0
                target = target[mask]
                output = output[mask]

                val_loss += l(output, target).item()

        avg_val_loss = val_loss / len(val_loader)

        loss_data["loss"].append(avg_val_loss)
        loss_data['loss_type'].append("Validation")
        loss_data["network"].append(f"Network: {i}")
        loss_data["loss_fn"].append(loss_name)


df = pd.DataFrame(loss_data)
df.head()

In [None]:
sns.catplot(y="loss", x="network", col="loss_fn", hue="loss_type", data=df, kind="bar", sharey=False, col_wrap=2)

In [None]:
df_wide = df.pivot_table(
    index = 'network',
    columns = 'loss_fn',
    values = 'loss',
    aggfunc='sum'
).reset_index()

print(df_wide.to_markdown())

|    | network            |   MSELoss |   MeanAbsoluteError |   MeanAbsolutePercentageError |   SILogLoss |
|---:|:-------------------|----------:|--------------------:|------------------------------:|------------:|
|  0 | DepthEstimationNet |   107.348 |             7.01437 |                       1.03218 |     10.9741 |
|  1 | SmallCNN           |   107.558 |             7.02589 |                       1.03388 |     11.4882 |
|  2 | UNetAlikeCNN       |   107.338 |             7.01323 |                       1.05625 |     12.1175 |
|  3 | UNetAlikeDeeperCNN |   107.402 |             7.01647 |                       1.04116 |     11.7885 |