In [1]:

# =============== [1] Install Required Packages ==============
!pip install scipy pillow matplotlib pandas --quiet

# =============== [2] Imports ==============
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt
from torchvision.utils import make_grid

class NYUDepthDatasetCSV(Dataset):
    def __init__(self, base_folder, csv_file, transform_img=None, transform_depth=None):
        self.base_folder = base_folder
        self.data = pd.read_csv(csv_file, header=None)
        self.transform_img = transform_img
        self.transform_depth = transform_depth

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = os.path.join(self.base_folder, self.data.iloc[idx, 0])
        depth_path = os.path.join(self.base_folder, self.data.iloc[idx, 1])
        image = Image.open(img_path).convert('RGB')
        depth = Image.open(depth_path).convert('L')
        if self.transform_img:
            image = self.transform_img(image)
        if self.transform_depth:
            depth = self.transform_depth(depth)
        return image, depth

base_folder = "/kaggle/input/nyu-depth-v2/nyu_data"
train_csv = os.path.join(base_folder, "data/nyu2_train.csv")
test_csv = os.path.join(base_folder, "data/nyu2_test.csv")


img_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
depth_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
])

train_dataset = NYUDepthDatasetCSV(base_folder, train_csv, img_transform, depth_transform)
test_dataset = NYUDepthDatasetCSV(base_folder, test_csv, img_transform, depth_transform)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=2)
val_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=2)

In [2]:
import pandas as pd
print(pd.read_csv("/kaggle/input/nyu-depth-v2/nyu_data/data/nyu2_train.csv").columns)
print(pd.read_csv("/kaggle/input/nyu-depth-v2/nyu_data/data/nyu2_train.csv").head())


Index(['data/nyu2_train/living_room_0038_out/37.jpg', 'data/nyu2_train/living_room_0038_out/37.png'], dtype='object')
    data/nyu2_train/living_room_0038_out/37.jpg  \
0  data/nyu2_train/living_room_0038_out/115.jpg   
1    data/nyu2_train/living_room_0038_out/6.jpg   
2   data/nyu2_train/living_room_0038_out/49.jpg   
3  data/nyu2_train/living_room_0038_out/152.jpg   
4  data/nyu2_train/living_room_0038_out/142.jpg   

    data/nyu2_train/living_room_0038_out/37.png  
0  data/nyu2_train/living_room_0038_out/115.png  
1    data/nyu2_train/living_room_0038_out/6.png  
2   data/nyu2_train/living_room_0038_out/49.png  
3  data/nyu2_train/living_room_0038_out/152.png  
4  data/nyu2_train/living_room_0038_out/142.png  


In [3]:
# =============== [5] Model Definition ==============
class DepthEstimationCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.enc1 = self.conv_block(3, 64)
        self.enc2 = self.conv_block(64, 128)
        self.enc3 = self.conv_block(128, 256)
        self.enc4 = self.conv_block(256, 512)
        self.bottleneck = self.conv_block(512, 1024)
        self.upconv4 = nn.ConvTranspose2d(1024, 512, 2, 2)
        self.dec4 = self.conv_block(1024, 512)
        self.upconv3 = nn.ConvTranspose2d(512, 256, 2, 2)
        self.dec3 = self.conv_block(512, 256)
        self.upconv2 = nn.ConvTranspose2d(256, 128, 2, 2)
        self.dec2 = self.conv_block(256, 128)
        self.upconv1 = nn.ConvTranspose2d(128, 64, 2, 2)
        self.dec1 = self.conv_block(128, 64)
        self.out = nn.Conv2d(64, 1, 1)
        self.pool = nn.MaxPool2d(2, 2)

    def conv_block(self, in_channels, out_channels):
        return nn.Sequential(
            nn.Conv2d(in_channels, out_channels, 3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, 3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        enc1 = self.enc1(x)
        enc2 = self.enc2(self.pool(enc1))
        enc3 = self.enc3(self.pool(enc2))
        enc4 = self.enc4(self.pool(enc3))
        bottleneck = self.bottleneck(self.pool(enc4))
        dec4 = self.upconv4(bottleneck)
        dec4 = torch.cat([dec4, enc4], dim=1)
        dec4 = self.dec4(dec4)
        dec3 = self.upconv3(dec4)
        dec3 = torch.cat([dec3, enc3], dim=1)
        dec3 = self.dec3(dec3)
        dec2 = self.upconv2(dec3)
        dec2 = torch.cat([dec2, enc2], dim=1)
        dec2 = self.dec2(dec2)
        dec1 = self.upconv1(dec2)
        dec1 = torch.cat([dec1, enc1], dim=1)
        dec1 = self.dec1(dec1)
        depth = self.out(dec1)
        depth = torch.sigmoid(depth)
        return depth

In [4]:
# =============== [6] Multi-GPU Setup ==============
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = DepthEstimationCNN()
if torch.cuda.device_count() > 1:
    print(f"Using {torch.cuda.device_count()} GPUs!")
    model = nn.DataParallel(model)
model = model.to(device)

import time

def train_depth_model(
    model, dataloader, criterion, optimizer, device, num_epochs=10, val_loader=None
):
    epoch_times = []
    train_start = time.time()

    for epoch in range(1, num_epochs + 1):
        model.train()
        epoch_start = time.time()
        total_loss, total_mae, total_rmse, total_acc, total_samples = 0, 0, 0, 0, 0
        for images, depths in dataloader:
            images, depths = images.to(device), depths.to(device)
            optimizer.zero_grad()
            preds = model(images)
            if preds.shape != depths.shape:
                depths = nn.functional.interpolate(depths, size=preds.shape[2:], mode='bilinear', align_corners=True)
            loss = criterion(preds, depths)
            loss.backward()
            optimizer.step()
            total_loss += loss.item() * images.size(0)
            mae = torch.abs(preds - depths).mean().item()
            rmse = torch.sqrt(((preds-depths)**2).mean()).item()
            acc = ((torch.abs(preds - depths) < 0.1).float().mean().item())
            total_mae += mae * images.size(0)
            total_rmse += rmse * images.size(0)
            total_acc += acc * images.size(0)
            total_samples += images.size(0)
        epoch_time = time.time() - epoch_start
        epoch_times.append(epoch_time)
        avg_epoch_time = sum(epoch_times) / len(epoch_times)
        elapsed = time.time() - train_start
        remaining = avg_epoch_time * (num_epochs - epoch)
        epoch_loss = total_loss / total_samples
        epoch_mae = total_mae / total_samples
        epoch_rmse = total_rmse / total_samples
        epoch_acc = total_acc / total_samples
        print(
            f"Epoch {epoch:02d} | Time: {epoch_time:.1f}s | "
            f"Loss: {epoch_loss:.4f} | MAE: {epoch_mae:.4f} | "
            f"RMSE: {epoch_rmse:.4f} | Acc(<0.1): {epoch_acc*100:.2f}% | "
            f"Elapsed: {elapsed/60:.1f}m | ETA: {remaining/60:.1f}m"
        )

        if val_loader:
            val_mae, val_rmse, val_acc = evaluate_depth_model(model, val_loader, device)
            print(
                f"  [VAL] MAE: {val_mae:.4f} | RMSE: {val_rmse:.4f} | Acc(<0.1): {val_acc*100:.2f}%"
            )


def evaluate_depth_model(model, dataloader, device):
    model.eval()
    total_mae, total_rmse, total_acc, total = 0., 0., 0., 0
    with torch.no_grad():
        for images, depths in dataloader:
            images, depths = images.to(device), depths.to(device)
            preds = model(images)
            if preds.shape != depths.shape:
                depths = nn.functional.interpolate(depths, size=preds.shape[2:], mode='bilinear', align_corners=True)
            mae = torch.abs(preds - depths).mean().item()
            rmse = torch.sqrt(((preds-depths)**2).mean()).item()
            acc = ((torch.abs(preds - depths) < 0.1).float().mean().item())
            total_mae += mae * images.size(0)
            total_rmse += rmse * images.size(0)
            total_acc += acc * images.size(0)
            total += images.size(0)
    return total_mae/total, total_rmse/total, total_acc/total


def visualize_batch(model, dataloader, device, num_images=4):
    model.eval()
    images, depths = next(iter(dataloader))
    images, depths = images.to(device), depths.to(device)
    with torch.no_grad():
        preds = model(images)
    img_vis = images[:num_images].cpu()
    pred_vis = preds[:num_images].cpu()
    depth_vis = depths[:num_images].cpu()
    def show_grid(tensor, title):
        grid_img = make_grid(tensor, nrow=num_images, normalize=True, scale_each=True)
        np_img = grid_img.permute(1, 2, 0).numpy()
        plt.figure(figsize=(12, 3))
        plt.title(title)
        plt.axis('off')
        plt.imshow(np_img)
        plt.show()
    show_grid(img_vis, "Input Images")
    show_grid(pred_vis, "Predicted Depths")
    show_grid(depth_vis, "Ground Truth Depths")


In [5]:
# ======================== [8] Run Training (Uncomment below to train) ========================
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.L1Loss()

In [None]:
train_depth_model(model, train_loader, criterion, optimizer, device, num_epochs=10, val_loader=val_loader)

In [None]:
visualize_batch(model, val_loader, device, num_images=4)

In [None]:
import torch
import zipfile

# ============ [1] Save the model AND optimizer checkpoint =============
save_dict = {
    'model': model.module.state_dict() if isinstance(model, torch.nn.DataParallel) else model.state_dict(),
    'optimizer': optimizer.state_dict()
}
model_path = "nyu_depth_model_with_optimizer.pth"
torch.save(save_dict, model_path)
print(f"Model+optimizer checkpoint saved as: {model_path}")

# ============ [2] Zip the checkpoint file =============
zip_path = "nyu_depth_model_with_optimizer.zip"
with zipfile.ZipFile(zip_path, 'w') as zipf:
    zipf.write(model_path)

print(f"Checkpoint zipped as: {zip_path}")

# You can now download 'nyu_depth_model_with_optimizer.zip' from your notebook
