<a href="https://www.kaggle.com/code/furqanbhatkaggle/gym-notebook34736?scriptVersionId=249388776" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import timm
import numpy as np
from PIL import Image
import pandas as pd


In [2]:
# import zipfile

# with zipfile.ZipFile('./amazon-bodym.zip', 'r') as zip_ref:
#     zip_ref.extractall('./')

In [3]:
# !mv ./amazon-bodym ./data

In [4]:
# 1) Dataset Definition
class BodyMeasurementDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.mask_dir = os.path.join(root_dir, "mask")
        self.mask_left_dir = os.path.join(root_dir, "mask_left")

        # Load CSVs
        self.hwg_df = pd.read_csv(os.path.join(root_dir, "hwg_metadata.csv"))  # subject_id, height, weight, gender
        self.msr_df = pd.read_csv(os.path.join(root_dir, "measurements.csv"))  # subject_id, <14 body parts>
        self.map_df = pd.read_csv(os.path.join(root_dir, "subject_to_photo_map.csv"))  # photo_id, subject_id

        # Merge everything into one DataFrame
        self.data = self.map_df.merge(self.hwg_df, on="subject_id").merge(self.msr_df, on="subject_id")

        self.transform = transform or transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.ToTensor()
        ])

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        photo_id = row["photo_id"]

        # Load silhouettes
        front_path = os.path.join(self.mask_dir, f"{photo_id}.png")
        side_path = os.path.join(self.mask_left_dir, f"{photo_id}.png")
        front_img = Image.open(front_path).convert("L")
        side_img = Image.open(side_path).convert("L")
        front_img = self.transform(front_img)  # [1, H, W]
        side_img = self.transform(side_img)    # [1, H, W]

        # Normalize height and weight 
        height = row["height_cm"] / 200.0
        weight = row["weight_kg"] / 200.0
        H = torch.full_like(front_img, height)
        W = torch.full_like(front_img, weight)

        x = torch.cat([front_img, side_img, H, W], dim=0) #4 channels

        # Get measurement labels
        measurement_cols = self.msr_df.columns.tolist()[1:]  # skip subject_id
        y = torch.tensor(row[measurement_cols].values.astype("float32"))

        return x, y


In [5]:
from torch.utils.data import DataLoader

# Transforms 
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor()
])

# Create datasets
train_ds = BodyMeasurementDataset('/kaggle/input/amazon-bodym-zip/amazon-bodym/train', transform)
testA_ds = BodyMeasurementDataset('/kaggle/input/amazon-bodym-zip/amazon-bodym/testA', transform)
testB_ds = BodyMeasurementDataset('/kaggle/input/amazon-bodym-zip/amazon-bodym/testB', transform)

# DataLoaders
train_loader = DataLoader(train_ds, batch_size=16, shuffle=True)
testA_loader = DataLoader(testA_ds, batch_size=16, shuffle=False)
testB_loader = DataLoader(testB_ds, batch_size=16, shuffle=False)



In [6]:
#Model
class BMnet(nn.Module):
    def __init__(self, backbone_name='tf_efficientnetv2_s.in21k', num_measures=14):
        super().__init__()
        # pretrained 4-channel MNASNet-B1 feature extractor
        self.backbone = timm.create_model(backbone_name, pretrained=True,
                                          in_chans=4, num_classes=0)
        F = self.backbone.num_features

        # --- optimized head ---
        hidden1 = 256
        hidden2 = 128

        # projection trunk
        self.proj = nn.Sequential(
            nn.Linear(F, hidden1, bias=False),
            nn.LayerNorm(hidden1),
            nn.ReLU(inplace=True),
            nn.Dropout(0.2),

            nn.Linear(hidden1, hidden2, bias=False),
            nn.LayerNorm(hidden2),
            nn.ReLU(inplace=True),
            nn.Dropout(0.1),
        )

        # final read-out
        self.readout = nn.Linear(hidden2, num_measures)

        # residual from first non-linear layer
        self.skip_proj = nn.Linear(F, hidden2, bias=False)

    def forward(self, x):
        feat = self.backbone(x)                # [B, F]
        h = self.proj(feat)                    # [B, hidden2]
        # add residual skip (projected from feat)
        h = h + self.skip_proj(feat)
        out = self.readout(h)                  # [B, 14]
        return out


# usage
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = BMnet().to(device)


model.safetensors:   0%|          | 0.00/193M [00:00<?, ?B/s]

In [7]:
# 4) Loss, Optimizer, Scheduler
criterion = nn.L1Loss()
optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[30, 60], gamma=0.1)


In [8]:
# 5) Training Loop
def train_epoch():
    model.train()
    running_loss = 0.0
    for x, y in train_loader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        pred = model(x)

        loss = criterion(pred, y)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * x.size(0)
    return running_loss / len(train_loader.dataset)

def eval_epoch(loader):
    model.eval()
    total_loss = 0.0
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            pred = model(x)
            total_loss += criterion(pred, y).item() * x.size(0)
    return total_loss / len(loader.dataset)

best_val = float('inf')
for epoch in range(20):
    tr_loss = train_epoch()
    val_loss = eval_epoch(testA_loader)
    scheduler.step()
    if val_loss < best_val:
        best_val = val_loss
        torch.save(model.state_dict(), 'best_bmnet.pth')
    print(f"Epoch {epoch:02d}  Train L1: {tr_loss:.3f}  Val L1: {val_loss:.3f}")


Epoch 00  Train L1: 5.384  Val L1: 1.953
Epoch 01  Train L1: 2.393  Val L1: 1.935
Epoch 02  Train L1: 2.415  Val L1: 2.362
Epoch 03  Train L1: 2.583  Val L1: 1.885
Epoch 04  Train L1: 2.267  Val L1: 2.530
Epoch 05  Train L1: 2.158  Val L1: 1.869
Epoch 06  Train L1: 2.072  Val L1: 1.564
Epoch 07  Train L1: 2.104  Val L1: 2.739
Epoch 08  Train L1: 2.039  Val L1: 1.603
Epoch 09  Train L1: 2.040  Val L1: 1.867
Epoch 10  Train L1: 1.948  Val L1: 3.755
Epoch 11  Train L1: 1.874  Val L1: 1.709
Epoch 12  Train L1: 1.865  Val L1: 2.369
Epoch 13  Train L1: 1.760  Val L1: 2.527
Epoch 14  Train L1: 1.799  Val L1: 2.129
Epoch 15  Train L1: 1.732  Val L1: 1.761
Epoch 16  Train L1: 1.670  Val L1: 1.578
Epoch 17  Train L1: 1.784  Val L1: 1.903
Epoch 18  Train L1: 1.679  Val L1: 3.117
Epoch 19  Train L1: 1.650  Val L1: 1.427


In [9]:
def test_epoch(loader):
    model.eval()
    all_preds = []
    all_targets = []

    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            pred = model(x)
            all_preds.append(pred.cpu())
            all_targets.append(y.cpu())

    preds = torch.cat(all_preds, dim=0).numpy()      # [N, 14]
    targets = torch.cat(all_targets, dim=0).numpy()  # [N, 14]
    errors = np.abs(preds - targets)                 # [N, 14]

    # Mean Absolute Error (per-measurement and overall)
    mae_per_measurement = errors.mean(axis=0)
    overall_mae = mae_per_measurement.mean()

    # TP metrics
    tp90 = np.percentile(errors, 90, axis=0).mean()
    tp75 = np.percentile(errors, 75, axis=0).mean()
    tp50 = np.percentile(errors, 50, axis=0).mean()

    print("\n📊 TestB Evaluation:")
    print(f"  MAE (overall): {overall_mae:.2f} cm")
    print(f"  TP90: {tp90:.2f} cm   TP75: {tp75:.2f} cm   TP50: {tp50:.2f} cm")
    print(f"  MAE per measurement: {np.round(mae_per_measurement, 2)}")

    return overall_mae


In [10]:
from IPython.display import FileLink

FileLink('best_bmnet.pth')

In [11]:
test_epoch(testB_loader)


📊 TestB Evaluation:
  MAE (overall): 1.69 cm
  TP90: 3.47 cm   TP75: 2.30 cm   TP50: 1.34 cm
  MAE per measurement: [0.94 1.27 1.15 1.36 3.47 0.79 1.67 2.83 1.6  0.92 1.73 2.15 3.09 0.64]


1.6859846

In [12]:
def predict_single(front_path, side_path, height_cm, weight_kg, model, transform):
    model.eval()

    # Load and preprocess images
    front_img = Image.open(front_path).convert("L")
    side_img  = Image.open(side_path).convert("L")

    front_tensor = transform(front_img)  # [1, H, W]
    side_tensor  = transform(side_img)   # [1, H, W]

    # Normalize height & weight and broadcast into images
    height_tensor = torch.full_like(front_tensor, height_cm / 200.0)
    weight_tensor = torch.full_like(front_tensor, weight_kg / 200.0)

    # Stack into [4, H, W] input
    x = torch.cat([front_tensor, side_tensor, height_tensor, weight_tensor], dim=0).unsqueeze(0).to(device)

    # Predict
    with torch.no_grad():
        pred = model(x).cpu().numpy().squeeze()  # shape [14,]

    return pred

In [13]:
predict_single('/kaggle/input/testmybody/mefront.png', '/kaggle/input/testmybody/meside.png', 180, 75.7, model, transform)

array([ 25.114126,  52.66688 ,  31.41469 ,  37.85356 , 100.55965 ,
        28.09609 , 179.76729 ,  99.05545 ,  81.84191 ,  37.45124 ,
        69.61589 ,  52.35584 ,  86.75419 ,  17.342896], dtype=float32)