In [38]:
import torch

if not torch.cuda.is_available():
    print("CUDA is not available. Exiting...")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device_name = torch.cuda.get_device_name(0)
print(f"Using {device_name} for training.")

Using NVIDIA GeForce GTX 1660 Ti for training.


In [39]:
# !pip install segmentation-models-pytorch

In [40]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from torch.optim import Adam
from torch.optim.lr_scheduler import LambdaLR
import os
import csv
import random
import cv2
import numpy as np
from skimage.transform import resize
import PIL
from torchvision.models import resnet34
from torchvision.models.segmentation import fcn_resnet50
import segmentation_models_pytorch as smp
# import albumentations as A  # Alternative for augmentations like `imutils`


In [41]:
HEIGHT = 128
WIDTH = 128
INIT_LR = 0.0001
EPOCHS = 15
TRAIN_PATH = "../nyu_data/data/nyu2_train.csv"
TEST_PATH = "../nyu_data/data/nyu2_test.csv"

In [42]:
# use torchvision.model.segmentation.fcn_resnet50
model = smp.Unet("resnet34", encoder_weights="imagenet", in_channels=3, classes=1).to(device)

In [43]:
# !pip install torchsummary

In [44]:
from torchsummary import summary
summary(model, (3, HEIGHT, WIDTH))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 64, 64]           9,408
       BatchNorm2d-2           [-1, 64, 64, 64]             128
              ReLU-3           [-1, 64, 64, 64]               0
         MaxPool2d-4           [-1, 64, 32, 32]               0
            Conv2d-5           [-1, 64, 32, 32]          36,864
       BatchNorm2d-6           [-1, 64, 32, 32]             128
              ReLU-7           [-1, 64, 32, 32]               0
            Conv2d-8           [-1, 64, 32, 32]          36,864
       BatchNorm2d-9           [-1, 64, 32, 32]             128
             ReLU-10           [-1, 64, 32, 32]               0
       BasicBlock-11           [-1, 64, 32, 32]               0
           Conv2d-12           [-1, 64, 32, 32]          36,864
      BatchNorm2d-13           [-1, 64, 32, 32]             128
             ReLU-14           [-1, 64,

In [45]:
# Function to read CSV file and load image-depth map pairs
def read_csv(csv_file_path):
    """
    Reads a CSV file and returns a list of tuples (image_path, depth_map_path).
    """
    with open(csv_file_path, 'r') as f:
        csv_reader = csv.reader(f, delimiter=',')
        return [(f'./{row[0]}', f'./{row[1]}') for row in csv_reader if len(row) > 0]

# Function to split the dataset into training and validation sets
def train_val_split(train_paths, val_size):
    """
    Splits the paths into training and validation datasets.
    """
    random.shuffle(train_paths)  # Shuffle paths
    len_train_paths = len(train_paths)
    i = int(len_train_paths * (1.0 - val_size))
    train = train_paths[:i]
    val = train_paths[i:len(train_paths)]
    return train, val

# Function to load training paths and labels
def load_train_paths(train_path):
    """
    Loads the training paths and labels from the CSV file.
    """
    train_paths = read_csv(train_path)
    labels = {img_path: dm_path for img_path, dm_path in train_paths}  # Map image to depth map
    x_paths = [img_path for img_path, _ in train_paths]  # Get list of image paths
    x_train_paths, x_val_paths = train_val_split(x_paths, 0.3)  # Split into training and validation sets

    partition = {
        'train': x_train_paths,
        'validation': x_val_paths
    }
    return partition, labels

In [46]:
import torch
import cv2
import numpy as np
from torchvision import transforms
from skimage.transform import resize
from skimage import io

# Normalize image
def normalize_img(img):
    """
    Normalizes an image to the range [0, 1].
    """
    norm_img = (img - img.min()) / (img.max() - img.min())
    return norm_img

# Preprocess input image
# def preprocess_image(img_path, horizontal_flip=False, height=128):
#     """
#     Reads and preprocesses an RGB image.
#     Args:
#         img_path (str): Path to the image.
#         horizontal_flip (bool): Whether to apply horizontal flip.
#         height (int): Target height for resizing.
#     Returns:
#         torch.Tensor: Preprocessed image tensor.
#     """
#     # Read and resize image
#     image = cv2.imread(img_path, cv2.IMREAD_COLOR)
#     image = resize(image, (height, int(height * 4 / 3)), mode='reflect', preserve_range=True)
#     image = image[:, 21:149].astype(np.float32)  # Crop and convert to float32
#     image = normalize_img(image)  # Normalize to [0, 1]

#     # Apply horizontal flip if needed
#     if horizontal_flip:
#         image = cv2.flip(image, 1)

#     # Convert to PyTorch tensor (C, H, W)
#     image = torch.tensor(image.transpose(2, 0, 1), dtype=torch.float32)

#     # Debug statement to check if the image path is valid and the image is loaded
#     print(f"Image path: {img_path}")
#     return image
def preprocess_image(image_path, res):
    # Correct the path
    base_path = "/home/river2000/monocular_depth_estimation/nyu_data/data/"
    full_image_path = os.path.join(base_path, image_path)
    
    image = io.imread(full_image_path)
    if image is None:
        raise ValueError(f"Image at path {full_image_path} could not be loaded.")
    height, width = res
    image = resize(image, (height, int(height * 4 / 3)), mode='reflect', preserve_range=True)
    return image

# Preprocess depth map
def preprocess_depth_map(depth_map_path, horizontal_flip=False, height=128):
    """
    Reads and preprocesses a depth map.
    Args:
        depth_map_path (str): Path to the depth map.
        horizontal_flip (bool): Whether to apply horizontal flip.
        height (int): Target height for resizing.
    Returns:
        torch.Tensor: Preprocessed depth map tensor.
    """
    # Read and resize depth map
    depth_map = cv2.imread(depth_map_path, cv2.IMREAD_GRAYSCALE)
    depth_map = resize(depth_map, (height, int(height * 4 / 3)), mode='reflect', preserve_range=True)
    depth_map = depth_map[:, 21:149].astype(np.float32)  # Crop and convert to float32
    depth_map = normalize_img(depth_map)  # Normalize to [0, 1]

    # Apply horizontal flip if needed
    if horizontal_flip:
        depth_map = cv2.flip(depth_map, 1)

    # Add channel dimension and convert to PyTorch tensor
    depth_map = torch.tensor(depth_map[np.newaxis, :, :], dtype=torch.float32)
    # Debug statement to check if the depth map path is valid and the depth map is loaded
    print(f"Depth map path: {depth_map_path}")
    return depth_map


In [47]:
# # Test the functions
# img_path = "../nyu_data/data/nyu2_train/basement_0001a_out/1.jpg"
# depth_map_path = "../nyu_data/data/nyu2_train/basement_0001a_out/1.png"
# image = preprocess_image(img_path)
# depth_map = preprocess_depth_map(depth_map_path)
# print(f"Image shape: {image.shape}, Depth map shape: {depth_map.shape}")



In [48]:
import torch
from torch.utils.data import Dataset
import random
import numpy as np

class DataGenerator(Dataset):
    def __init__(self, list_IDs, labels, dim=(128, 128), n_channels=3, batch_size=16, shuffle=True, pred=False):
        """
        Args:
            list_IDs (list): List of image file paths.
            labels (dict): Dictionary mapping image paths to depth map paths.
            dim (tuple): Dimensions of the input images (H, W).
            n_channels (int): Number of input channels.
            batch_size (int): Batch size.
            shuffle (bool): Whether to shuffle the data at the end of each epoch.
            pred (bool): If True, only generates inputs (no labels).
        """
        self.dim = dim
        self.batch_size = batch_size
        self.labels = labels
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.shuffle = shuffle
        self.pred = pred
        self.on_epoch_end()

    def __len__(self):
        """
        Returns the number of batches per epoch.
        """
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        """
        Generates one batch of data.
        """
        # Generate indexes of the batch
        indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]
        if self.pred:
            X = self.__data_generation(list_IDs_temp)
            return X
        # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return X, y
    
    def on_epoch_end(self):
        """
        Updates indexes after each epoch.
        """
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        """
        Generates data containing batch_size samples.
        """
        # Initialization
        X = np.empty((self.batch_size, self.dim[0], self.dim[1], self.n_channels))

        if not self.pred:
            y = np.empty((self.batch_size, self.dim[0], self.dim[1], 1))

            for i, ID in enumerate(list_IDs_temp):
                res = random.choice([True, False])
                X[i,] = preprocess_image(ID, res)
                y[i,] = preprocess_depth_map(self.labels[ID], res)

            return torch.tensor(X, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)
        else:
            for i, ID in enumerate(list_IDs_temp):
                res = random.choice([True, False])
                X[i,] = preprocess_image(ID, res)
            return torch.tensor(X, dtype=torch.float32)


In [49]:
partition, labels = load_train_paths(TRAIN_PATH)

In [50]:
print(len(partition['train']), len(partition['validation']))

35481 15207


In [51]:
training_generator = DataGenerator(partition['train'], labels = labels, batch_size=16, dim=(128, 128), n_channels=3, shuffle=True, pred=False)
validation_generator = DataGenerator(partition['validation'], labels = labels, batch_size=16, dim=(128, 128), n_channels=3, shuffle=True, pred=False)

In [52]:
from torch.optim.lr_scheduler import LambdaLR

# def poly_decay(epoch, max_epochs, base_lr, power=1.0):
#     """
#     Implements polynomial learning rate decay.
#     Args:
#         epoch (int): Current epoch.
#         max_epochs (int): Total number of epochs.
#         base_lr (float): Initial learning rate.
#         power (float): Decay power.
#     Returns:
#         float: Adjusted learning rate.
#     """
#     return base_lr * (1 - (epoch / float(max_epochs))) ** power

from torch.optim import Adam

# Define optimizer
optimizer = Adam(model.parameters(), lr=INIT_LR, amsgrad=True)

# Define learning rate scheduler
# max_epochs = EPOCHS
# scheduler = LambdaLR(optimizer, lr_lambda=lambda epoch: poly_decay(epoch, max_epochs, INIT_LR))

import torch
import torch.nn.functional as F

def depth_loss(y_true, y_pred, w1=1.0, w2=3.0, w3=0.1):
    """
    Custom depth loss combining SSIM, edge loss, and depth loss.
    Args:
        y_true (torch.Tensor): Ground truth depth map.
        y_pred (torch.Tensor): Predicted depth map.
        w1, w2, w3 (float): Weights for SSIM, edge loss, and depth loss, respectively.
    Returns:
        torch.Tensor: Combined loss.
    """
    # Depth loss (L1 loss)
    l_depth = torch.mean(torch.abs(y_pred - y_true))

    # Edge loss
    dy_true, dx_true = torch.gradient(y_true, dim=(2, 3))
    dy_pred, dx_pred = torch.gradient(y_pred, dim=(2, 3))
    l_edges = torch.mean(torch.abs(dy_pred - dy_true) + torch.abs(dx_pred - dx_true))

    # SSIM loss
    ssim_loss = 1 - ssim(y_true, y_pred)
    l_ssim = torch.clamp(ssim_loss * 0.5, 0, 1)

    # Combined loss
    return (w1 * l_ssim) + (w2 * l_edges) + (w3 * l_depth)

def ssim(y_true, y_pred, max_val=1.0):
    """
    Computes Structural Similarity Index (SSIM).
    Args:
        y_true (torch.Tensor): Ground truth tensor.
        y_pred (torch.Tensor): Predicted tensor.
        max_val (float): Maximum value in the tensors.
    Returns:
        torch.Tensor: SSIM value.
    """
    c1 = (0.01 * max_val) ** 2
    c2 = (0.03 * max_val) ** 2

    mu_true = F.avg_pool2d(y_true, kernel_size=3, stride=1, padding=1)
    mu_pred = F.avg_pool2d(y_pred, kernel_size=3, stride=1, padding=1)

    sigma_true_sq = F.avg_pool2d(y_true * y_true, kernel_size=3, stride=1, padding=1) - mu_true ** 2
    sigma_pred_sq = F.avg_pool2d(y_pred * y_pred, kernel_size=3, stride=1, padding=1) - mu_pred ** 2
    sigma_true_pred = F.avg_pool2d(y_true * y_pred, kernel_size=3, stride=1, padding=1) - mu_true * mu_pred

    ssim_map = ((2 * mu_true * mu_pred + c1) * (2 * sigma_true_pred + c2)) / \
               ((mu_true ** 2 + mu_pred ** 2 + c1) * (sigma_true_sq + sigma_pred_sq + c2))
    return ssim_map.mean()

def depth_acc(y_true, y_pred):
    """
    Computes soft accuracy for depth maps.
    Args:
        y_true (torch.Tensor): Ground truth tensor.
        y_pred (torch.Tensor): Predicted tensor.
    Returns:
        torch.Tensor: Soft accuracy.
    """
    return torch.mean((torch.round(y_true) == torch.round(y_pred)).float())


In [53]:
# Example tensors
y_true = torch.rand((4, 1, 128, 128))  # Ground truth
y_pred = torch.rand((4, 1, 128, 128))  # Predictions

# Calculate loss and accuracy
loss = depth_loss(y_true, y_pred)
accuracy = depth_acc(y_true, y_pred)

print("Loss:", loss.item())
print("Accuracy:", accuracy.item())


Loss: 1.9428634643554688
Accuracy: 0.50103759765625


In [54]:
# Training Loop
def train_model(model, train_loader, val_loader, optimizer, epochs, device):
    model.train()
    for epoch in range(epochs):
        train_loss = 0.0
        val_loss = 0.0

        # Training Phase
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)

            # Forward Pass
            optimizer.zero_grad()
            y_pred = model(X_batch)
            loss = depth_loss(y_batch, y_pred)

            # Backward Pass
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        # Validation Phase
        model.eval()
        with torch.no_grad():
            for X_val, y_val in val_loader:
                X_val, y_val = X_val.to(device), y_val.to(device)
                y_pred = model(X_val)
                val_loss += depth_loss(y_val, y_pred).item()

        print(f"Epoch {epoch+1}/{epochs} - Train Loss: {train_loss/len(train_loader):.4f}, "
              f"Val Loss: {val_loss/len(val_loader):.4f}")


In [55]:
from torch.utils.data import DataLoader

# Create DataLoaders
train_loader = DataLoader(training_generator, batch_size=16, shuffle=True, num_workers=4)
val_loader = DataLoader(validation_generator, batch_size=16, shuffle=False, num_workers=4)

# Train the Model
train_model(model, train_loader, val_loader, optimizer, EPOCHS, device)


FileNotFoundError: Caught FileNotFoundError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/home/river2000/.local/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 351, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
  File "/home/river2000/.local/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 52, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/river2000/.local/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 52, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/tmp/ipykernel_90435/2027826872.py", line 46, in __getitem__
    X, y = self.__data_generation(list_IDs_temp)
  File "/tmp/ipykernel_90435/2027826872.py", line 70, in __data_generation
    X[i,] = preprocess_image(ID, res)
  File "/tmp/ipykernel_90435/1418762535.py", line 48, in preprocess_image
    image = io.imread(full_image_path)
  File "/home/river2000/.local/lib/python3.10/site-packages/skimage/io/_io.py", line 60, in imread
    img = call_plugin('imread', fname, plugin=plugin, **plugin_args)
  File "/home/river2000/.local/lib/python3.10/site-packages/skimage/io/manage_plugins.py", line 217, in call_plugin
    return func(*args, **kwargs)
  File "/home/river2000/.local/lib/python3.10/site-packages/skimage/io/_plugins/imageio_plugin.py", line 11, in imread
    out = np.asarray(imageio_imread(*args, **kwargs))
  File "/home/river2000/.local/lib/python3.10/site-packages/imageio/v3.py", line 53, in imread
    with imopen(uri, "r", **plugin_kwargs) as img_file:
  File "/home/river2000/.local/lib/python3.10/site-packages/imageio/core/imopen.py", line 113, in imopen
    request = Request(uri, io_mode, format_hint=format_hint, extension=extension)
  File "/home/river2000/.local/lib/python3.10/site-packages/imageio/core/request.py", line 248, in __init__
    self._parse_uri(uri)
  File "/home/river2000/.local/lib/python3.10/site-packages/imageio/core/request.py", line 408, in _parse_uri
    raise FileNotFoundError("No such file: '%s'" % fn)
FileNotFoundError: No such file: '/home/river2000/monocular_depth_estimation/nyu_data/data/data/nyu2_train/living_room_0019_out/149.jpg'


In [76]:
def evaluate_model(model, test_loader, device):
    model.eval()
    total_loss = 0.0
    total_accuracy = 0.0

    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            y_pred = model(X_batch)

            loss = depth_loss(y_batch, y_pred)
            accuracy = depth_acc(y_batch, y_pred)

            total_loss += loss.item()
            total_accuracy += accuracy.item()

    print(f"Test Loss: {total_loss/len(test_loader):.4f}, Test Accuracy: {total_accuracy/len(test_loader):.4f}")


In [None]:
# Prepare test dataset and DataLoader
test_paths = read_csv(TEST_PATH)
test_labels = {i: j for i, j in test_paths}

test_dataset = DataGenerator(test_paths, labels=test_labels, dim=(128, 128), n_channels=3, shuffle=False, pred=False)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=4)

# Evaluate the Model
evaluate_model(model, test_loader, device)


In [None]:
# Save the model
torch.save(model.state_dict(), "model.pth")

# Load the model
model.load_state_dict(torch.load("model.pth"))
model.eval()


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Visualize Predictions
model.eval()
for i in range(5):  # Show 5 random samples
    X, y_true = test_dataset[i]
    X = X.unsqueeze(0).to(device)  # Add batch dimension
    y_pred = model(X).squeeze().cpu().detach().numpy()

    plt.figure(figsize=(15, 5))
    plt.subplot(1, 3, 1)
    plt.title("Predicted Depth Map")
    plt.imshow(y_pred, cmap="plasma")

    plt.subplot(1, 3, 2)
    plt.title("Ground Truth Depth Map")
    plt.imshow(y_true.squeeze(), cmap="plasma")

    plt.subplot(1, 3, 3)
    plt.title("Input Image")
    plt.imshow(X.squeeze().permute(1, 2, 0).cpu().numpy())

    plt.show()
