##Import libraries

In [None]:
!pip install timm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from __future__ import print_function, division
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
import torchvision
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms, utils
from PIL import Image
import torch.utils.data as data
import torch.nn as nn
import torchvision.models as models
import torch.nn.functional as F
import torch.optim as optim
import cv2


# Ignore warnings
import warnings
warnings.filterwarnings("ignore")


##Data loading


This part of the code creates a PyTorch dataset object called NYUDataset for loading the NYU Depth v2 dataset.
<br>
<br>
The __init__ function defines some class attributes that will be used later when loading the data.
<br>

    *   `self.root_dir`: The directory path where the NYU Depth v2 dataset is located.
    *   `self.split`: A string indicating whether to load the "train" or "test" split of the dataset.
    *   `self.transform`: A transformation to be applied to the RGB and depth images (e.g., resizing, normalizing).
    *   `self.rgb_files`: A list of file paths to the RGB image files, sorted in ascending order.
    *   `self.depth_files`: A list of file paths to the depth image files, sorted in ascending order.


The __len__ method returns the length of the self.rgb_files list, which is equivalent to the number of images in the dataset split.
<br>
<br>
The __getitem__ method takes an index idx as input and returns the RGB and depth images as numpy arrays, after applying the specified transformations.





In [None]:
class NYUDataset(torch.utils.data.Dataset):

    def __init__(self, transform=None):
        self.root_dir = os.getcwd()

        self.rgb_files = []
        self.depth_files = []

        for subdir, dirs, files in os.walk(self.root_dir):
            for file in files:
                if 'rgb' in file:
                    self.rgb_files.append(os.path.join(subdir, file))
                elif 'depth' in file:
                    self.depth_files.append(os.path.join(subdir, file))

        self.transform = transform

    def __len__(self):
        return len(self.rgb_files)

    def __getitem__(self, idx):
        rgb = Image.open(self.rgb_files[idx])
        depth = Image.open(self.depth_files[idx])

        if self.transform:
            rgb = self.transform(rgb)
            depth = self.transform(depth)
        return rgb, depth

##Data Pre-processing

A set of transformations to apply to the RGB and depth images.
The ToTensor() function will convert the RGB and depth images to PyTorch tensors and scale the pixel values between 0 and 1, which is the desired range for optimal model training.
<br>
<br>
The dataset is then split into a train set and test set using an 80-20 split, and dataloader objects are created for each set. The train dataloader randomly shuffles the data during each epoch, while the test dataloader does not shuffle the data. Both dataloaders use four worker threads to load the data in parallel.


In [None]:
%cd "drive/MyDrive/Final project/dataset"

# Define the transforms
transform = transforms.Compose([
    transforms.Resize((240, 320)),
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.float() / 255) # normalize the image by dividing by 255
])


# Initialize the dataset and data loader
nyu_dataset = NYUDataset(transform = transform)

# Access a sample RGB image and print its shape and data type
rgb_image = nyu_dataset[0][0]
print(rgb_image.shape)
print(rgb_image.dtype)


# Access a sample depth image and print its shape and data type

depth_image = nyu_dataset[0][1]
print(depth_image.shape)
print(depth_image.dtype)

print(len(nyu_dataset))

/content/drive/MyDrive/Final project/dataset
torch.Size([3, 240, 320])
torch.float32
torch.Size([1, 240, 320])
torch.float32
36396


In [None]:
# Split the dataset into train, validation, and test sets
train_size = int(0.8 * len(nyu_dataset))
val_size = len(nyu_dataset) - train_size
train_dataset, val_dataset = random_split(nyu_dataset, [train_size, val_size])


# Create the train, validation, and test dataloader objects
train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=16, shuffle=False)


print(val_size)
print(train_size)

7280
29116


##Model definition

In this code, we import the ResNet-50 model from torchvision.models, which is a pre-trained CNN on ImageNet. We then set the pretrained argument to True to load the pre-trained weights of the model. Finally, we loop through all the parameters of the model and set requires_grad to False to freeze the weights, which means that we will not update them during training.

In [None]:
class DepthEstimationModel(nn.Module):
    def __init__(self):
        super(DepthEstimationModel, self).__init__()

        # Load pre-trained ResNet18 model
        self.encoder = models.resnet18()
        self.encoder_tail = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True)
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(64, 64, kernel_size=4, stride=2, padding=1),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(64, 32, kernel_size=4, stride=2, padding=1),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(32, 16, kernel_size=4, stride=2, padding=1),
            nn.ReLU(inplace=True),
            nn.ConvTranspose2d(16, 1, kernel_size=4, stride=2, padding=1),
            nn.Upsample(size=(240, 320))
        )

    def forward(self, x):
        # Forward pass through encoder
        x = self.encoder.conv1(x)
        x = self.encoder.bn1(x)
        x = self.encoder.relu(x)
        x = self.encoder.maxpool(x)

        x = self.encoder.layer1(x)
        x = self.encoder.layer2(x)
        x = self.encoder.layer3(x)
        x = self.encoder.layer4(x)

        # Forward pass through decoder
        x = self.encoder_tail(x)
        x = self.decoder(x)


        return x

##Define the loss functions

Define the loss function  between the predicted depth map and the ground truth depth map.

In [None]:
def depth_loss(depth_pred, depth_true):
    # Calculate the loss between predicted depth and ground truth depth

    # Calculate RMSE
    rmse = torch.sqrt(F.mse_loss(depth_pred, depth_true))

    # Calculate abs rel
    #abs_rel = torch.mean(torch.abs(depth_pred - depth_true) / depth_true)

    # Calculate threshold
    #max_ratio = torch.max(depth_pred / depth_true, depth_true / depth_pred)

    #threshold = torch.mean((max_ratio < 1.25).float())

    return  rmse

##Model Training and Evaluation

In this code, we first define the optimizer as Adam with a learning rate of 0.001. Then we define the number of epochs to train, and loop over the epochs and batches in the train dataloader. For each batch, we get the inputs and labels, zero the gradients, forward pass the inputs through the model, compute the depth loss, backpropagate the gradients, and update the weights using the optimizer.


In [None]:
# Define the model and move it to the GPU
model = DepthEstimationModel()

# Define the loss function and optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 20

# Train the model
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(train_dataloader, 0):
        # Get the inputs and labels
        inputs, labels = data
        # Move data to GPU

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward + backward + optimize
        outputs = model(inputs)
        rmse = depth_loss(outputs, labels)
        loss = rmse
        loss.backward()
        optimizer.step()

        # Add current batch loss to running loss
        running_loss += loss.item()
        print(loss)


tensor(9.3580, grad_fn=<SqrtBackward0>)
tensor(9.4467, grad_fn=<SqrtBackward0>)
tensor(8.9384, grad_fn=<SqrtBackward0>)
tensor(6.9952, grad_fn=<SqrtBackward0>)
tensor(8.0713, grad_fn=<SqrtBackward0>)
tensor(8.5141, grad_fn=<SqrtBackward0>)
tensor(6.1407, grad_fn=<SqrtBackward0>)
tensor(9.8652, grad_fn=<SqrtBackward0>)
tensor(5.4434, grad_fn=<SqrtBackward0>)
tensor(9.1061, grad_fn=<SqrtBackward0>)
tensor(9.0448, grad_fn=<SqrtBackward0>)
tensor(8.9028, grad_fn=<SqrtBackward0>)
tensor(7.9157, grad_fn=<SqrtBackward0>)
tensor(8.7376, grad_fn=<SqrtBackward0>)
tensor(8.4040, grad_fn=<SqrtBackward0>)
tensor(6.9600, grad_fn=<SqrtBackward0>)
tensor(6.3930, grad_fn=<SqrtBackward0>)
tensor(8.7424, grad_fn=<SqrtBackward0>)
tensor(6.9994, grad_fn=<SqrtBackward0>)
tensor(6.2590, grad_fn=<SqrtBackward0>)
tensor(7.7568, grad_fn=<SqrtBackward0>)
tensor(7.5251, grad_fn=<SqrtBackward0>)
tensor(7.9911, grad_fn=<SqrtBackward0>)
tensor(6.0791, grad_fn=<SqrtBackward0>)
tensor(6.6056, grad_fn=<SqrtBackward0>)
