<a href="https://colab.research.google.com/github/GoldenSunFire/Depth-Estimation-Project/blob/main/DepthMapEstimation_ResNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [60]:
import torch
import torch.nn as nn
import torchvision.models as models
import os
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import numpy as np



In [61]:
class DepthEstimationModel(nn.Module):
    def __init__(self, pretrained=True):
        super(DepthEstimationModel, self).__init__()
        self.resnet = models.resnet50(pretrained=pretrained)
        self.resnet.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.resnet.fc = nn.Conv2d(2048, 1, kernel_size=1, stride=1)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))

    def forward(self, x):
        x = self.resnet.conv1(x)
        x = self.resnet.bn1(x)
        x = self.resnet.relu(x)
        x = self.resnet.maxpool(x)

        x = self.resnet.layer1(x)
        x = self.resnet.layer2(x)
        x = self.resnet.layer3(x)
        x = self.resnet.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)

        x = self.resnet.fc(x)
        return x.squeeze()


In [66]:
class DepthEstimationDataset(Dataset):
    def __init__(self, data_dir):
        self.data_dir = data_dir
        self.image_files = sorted([f for f in os.listdir(data_dir) if f.endswith("_rgb.png")])
        self.depth_files = sorted([f for f in os.listdir(data_dir) if f.endswith("_gt.png")])
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),  # Resize images to the input size expected by the model
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize RGB images
        ])
        self.depth_transform = transforms.Compose([
            transforms.Resize((224, 224)),  # Resize depths to match RGB image size
            transforms.ToTensor()
        ])

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        depth_name = self.depth_files[idx]
        img_path = os.path.join(self.data_dir, img_name)
        depth_path = os.path.join(self.data_dir, depth_name)

        image = Image.open(img_path)
        
        # Convert image to RGB if it has more than 3 channels
        if image.mode != 'RGB':
            image = image.convert('RGB')

        depth = Image.open(depth_path).convert('L')

        image = self.transform(image)
        #depth = depth.permute(0, 1, 4, 5)
        #depth = torch.from_numpy(np.array(depth) / 255.0).unsqueeze(0).float()
        #depth = transforms.Resize((image.shape[1], image.shape[2]))(depth)  # Resize depth image to match RGB image size
        #depth = transforms.ToTensor()(depth)
        depth = self.depth_transform(depth)

        return image, depth


# Set the path to your data directory
data_dir = '/content/sample_data/dataset'

# Define the desired height and width
desired_height = 224
desired_width = 224

# Create an instance of the dataset
dataset = DepthEstimationDataset(data_dir)#, desired_height, desired_width)
images_shape = images.shape
depths_shape = depths.shape
print(images_shape)
print(depths_shape)

torch.Size([8, 3, 224, 224])
torch.Size([8, 1, 1, 480, 640])


In [None]:
# Split the dataset into train, validation, and test sets
train_ratio = 0.8
val_ratio = 0.1
test_ratio = 0.1
train_size = int(train_ratio * len(dataset))
val_size = int(val_ratio * len(dataset))
test_size = len(dataset) - train_size - val_size

train_set, val_set, test_set = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])

# Create data loaders
batch_size = 16
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=4)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=4)

# Create an instance of the Depth Estimation Model
model = DepthEstimationModel()

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

num_epochs = 20
# Training loop
for epoch in range(num_epochs):
    running_loss = 0.0
    for images, depths in train_loader:
        optimizer.zero_grad()

        # Reshape the input tensor
        images = images.permute(0, 1, 3, 2)  # Change dimensions from [batch_size, height, width, channels] to [batch_size, channels, height, width]
        depths = depths.unsqueeze(1)  # Add a singleton dimension to depths [batch_size, 1, height, width]

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, depths)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Print training loss for each epoch
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}")


RuntimeError: ignored

In [None]:


# Evaluation
with torch.no_grad():
    total_loss = 0.0
    for images, depths in val_loader:
        outputs = model(images)
        loss = criterion(outputs, depths)
        total_loss += loss.item()

    # Print validation loss
    print(f"Validation Loss: {total_loss/len(val_loader)}")

# Testing
with torch.no_grad():
    total_loss = 0.0
    for images, depths in test_loader:
        outputs = model(images)
        loss = criterion(outputs, depths)
        total_loss += loss.item()

    # Print test loss
    print(f"Test Loss: {total_loss/len(test_loader)}")