## Environment Setting

In [7]:
import torch
from PIL import Image
from torch.utils.data import Dataset, DataLoader, random_split
import os
import pandas as pd
from pathlib import Path
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torchvision import transforms



## DataLoader

### Load Image

In [8]:
class MyDataset(Dataset):
    def __init__(self, 
                 root, 
                 if_train = True,
                 data_dir = "train",
                 color_dir = "color",
                 depth_dir = "depth_raw",
                 rgb_name = "rgb.png",
                 depth_name="depth_raw.png",
                 csv_name = "nutrition5k_train.csv",
                 transform = False,
                 ):
        self.root = root
        self.data = self.root / data_dir
        self.color_dir  = self.data / color_dir
        self.depth_dir = self.data / depth_dir
        self.rgb_name = rgb_name
        self.depth_name = depth_name
        self.if_train = if_train
        self.transform = transform
        self.to_tensor = transforms.ToTensor()



        if self.if_train:
            df = pd.read_csv(self.root / csv_name)
            self.id2cal = {str(r["ID"]) : float(r["Value"]) for _, r in df.iterrows()}

        rgb_paths = sorted((p / rgb_name for p in self.color_dir.glob("dish_*") if (p / rgb_name).exists()))
        if not rgb_paths:
            raise RuntimeError(f"Found 0 images in {self.color_dir}.")
        
        self.samples = []
        if self.if_train:
            for rgb_path in rgb_paths:
                dish_id = rgb_path.parent.name
                if dish_id not in self.id2cal:
                    print(f"Warning: {dish_id} not found in CSV.")
                depth_path = None
                if self.depth_dir is not None:
                    depth_path = self.depth_dir / dish_id / self.depth_name

                self.samples.append((rgb_path, depth_path, self.id2cal[dish_id]))

        else:
            for rgb_path in rgb_paths:
                dish_id = rgb_path.parent.name
                depth_path = None
                if self.depth_dir is not None:
                    depth_path = self.depth_dir / dish_id / self.depth_name

                self.samples.append((rgb_path, depth_path, None))
                
        #print(f"Total samples: {len(self.samples)}")


    def __len__(self):
            return len(self.samples)
    
    def __getitem__(self, idx):
        rgb_path, depth_path, cal = self.samples[idx]
        rgb = Image.open(rgb_path).convert("RGB")
        depth = Image.open(depth_path).convert("L")

        rgb = self.to_tensor(rgb)
        depth = self.to_tensor(depth)


        if self.if_train == True:
            cal = torch.tensor(cal, dtype=torch.float32)
            return rgb, depth, cal
        else:
            return rgb, depth
            
        

In [9]:
print(Path.cwd())

/mnt/c/Users/lhg45/Desktop/COMP90086/PJ/Nutrition5K


In [10]:
train_set = MyDataset(root=Path.cwd(), if_train=True, transform=False)
test_set = MyDataset(root=Path.cwd(), data_dir="test", if_train=False, transform=False)

dataset_size = len(train_set)
val_ratio = 0.1
val_size = int(dataset_size * val_ratio)
train_size = dataset_size - val_size
train_subset, val_subset = random_split(train_set, [train_size, val_size])

#torch.manual_seed(42) 

train_loader = DataLoader(train_subset, batch_size=16, shuffle=True,
                          num_workers=4, pin_memory=True)
val_loader = DataLoader(val_subset, batch_size=16, shuffle=False,
                        num_workers=4, pin_memory=True)
test_loader  = DataLoader(test_set,  batch_size=16, shuffle=False,
                          num_workers=4, pin_memory=True)


# Model Definition

In [11]:
def conv_block(c_in, c_out, k=5, s=1, p=1):
    return nn.Sequential(
        nn.Conv2d(c_in, c_out, kernel_size=k, stride=s, padding=p, bias=False),
        nn.BatchNorm2d(c_out),
        nn.ReLU(inplace=True)
    )

class RGBBranch(nn.Module):
    def __init__(self, in_ch=3):
        super().__init__()
        self.stem = nn.Sequential(
            conv_block(in_ch, 32),                
            conv_block(32, 32),
            nn.MaxPool2d(2),                      
            conv_block(32, 64),
            conv_block(64, 64),
            nn.MaxPool2d(2),                      
            conv_block(64, 128),
            nn.MaxPool2d(2),                      
            conv_block(128, 256),
            nn.MaxPool2d(2),                     
        )
        self.gap = nn.AdaptiveAvgPool2d(1)       

    def forward(self, x):
        x = self.stem(x)
        x = self.gap(x).flatten(1)               
        return x

class DepthBranch(nn.Module):
    def __init__(self, in_ch=1):
        super().__init__()
        self.stem = nn.Sequential(
            conv_block(in_ch, 16),
            conv_block(16, 16),
            nn.MaxPool2d(2),                      
            conv_block(16, 32),
            nn.MaxPool2d(2),                      
            conv_block(32, 64),
            nn.MaxPool2d(2),                      
            conv_block(64, 128),
            nn.MaxPool2d(2),                      
        )
        self.gap = nn.AdaptiveAvgPool2d(1)       

    def forward(self, x):
        x = self.stem(x)
        x = self.gap(x).flatten(1)              
        return x

class RGBDNet(nn.Module):

    def __init__(self, use_dropout=True):
        super().__init__()
        self.rgb = RGBBranch(3)
        self.depth = DepthBranch(1)

        fusion_dim = 256 + 128
        mlp = [
            nn.Linear(fusion_dim, 128),
            nn.ReLU(inplace=True)
        ]
        if use_dropout:
            mlp.append(nn.Dropout(0.1))
        mlp += [
            nn.Linear(128, 64),
            nn.ReLU(inplace=True),
            nn.Linear(64, 32),
            nn.ReLU(inplace=True),
            nn.Linear(32, 1)
        ]
        self.head = nn.Sequential(*mlp)

    def forward(self, rgb, depth):
        f_rgb = self.rgb(rgb)       
        f_d   = self.depth(depth)    
        f     = torch.cat([f_rgb, f_d], dim=1)
        out   = self.head(f).squeeze(1)  
        return out



# Training

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = RGBDNet()  
model.to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
criterion = nn.SmoothL1Loss()

train_losses, val_losses = [], []
train_smapes, val_smapes = [], []

num_epochs = 10

for epoch in range(num_epochs):
    model.train()
    running_loss, smape_sum, n_batches = 0.0, 0.0, 0
    print("Start epoch: " + str(epoch+1))
    image_count = 0

    for rgb, depth, cal in train_loader:
        rgb, depth, cal = rgb.to(device), depth.to(device), cal.to(device)
        pred = model(rgb, depth)
        loss = criterion(pred, cal)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * rgb.size(0)
        smape_batch = torch.mean( 2 * torch.abs(pred - cal) / (torch.abs(pred) + torch.abs(cal) + 1e-8)).item()
        smape_sum += smape_batch
        n_batches += 1

        image_count += rgb.size(0)
        print(f"Processed {image_count} images", end='\r')

    train_loss = running_loss / len(train_loader.dataset)
    train_smape = smape_sum / n_batches
    train_losses.append(train_loss)
    train_smapes.append(train_smape)

    model.eval()
    val_loss, smape_sum, n_batches = 0.0, 0.0, 0
    with torch.no_grad():
        for rgb, depth, cal in val_loader:
            rgb, depth, cal = rgb.to(device), depth.to(device), cal.to(device)
            pred = model(rgb, depth)
            loss = criterion(pred, cal)
            val_loss += loss.item() * rgb.size(0)
            smape_batch = torch.mean( 2 * torch.abs(pred - cal) / (torch.abs(pred) + torch.abs(cal) + 1e-8)).item()
            smape_sum += smape_batch
            n_batches += 1

    val_loss /= len(val_loader.dataset)
    val_smape = smape_sum / n_batches
    val_losses.append(val_loss)
    val_smapes.append(val_smape)

    torch.cuda.empty_cache()

    print(f"Epoch [{epoch+1}/{num_epochs}] | "
          f"Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, "
          f"Train MAPE: {train_smape*100:.3f}%, Val MAPE: {val_smape*100:.3f}%")




Start epoch: 1


## Plot

In [None]:
plt.figure(figsize=(10,4))
plt.subplot(1,2,1)
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Val Loss')
plt.title('MSE Loss Curve'); plt.xlabel('Epoch'); plt.ylabel('Loss'); plt.legend()

plt.subplot(1,2,2)
plt.plot([m*100 for m in train_mapes], label='Train MAPE')
plt.plot([m*100 for m in val_mapes], label='Val MAPE')
plt.title('MAPE Curve'); plt.xlabel('Epoch'); plt.ylabel('MAPE (%)'); plt.legend()

plt.tight_layout()
plt.show()

# Testing