In [40]:
import pandas as pd
from torchvision import transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from helpers.utils import CustomImageDataset
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split

In [None]:
# number_of_smaller_set = 636
# number_of_smaller_validation_set = 24
labels_df = pd.read_csv('data/boneage-training-dataset.csv')
training_labels, testing_labels = train_test_split(labels_df, train_size=0.95, test_size=0.05)

# training_labels = labels.head(number_of_smaller_set)
# validation_labels = labels.tail(number_of_smaller_validation_set)


In [48]:
testing_labels

Unnamed: 0,id,boneage,male
8926,11495,156,True
9710,12373,108,True
269,1678,132,False
12281,15242,150,True
9521,12168,150,True
...,...,...,...
8971,11551,120,False
2447,4100,162,False
11523,14404,204,True
8188,10665,204,True


In [45]:
transformer = transforms.Compose([
  transforms.ToTensor(),
  transforms.Normalize((0.5,), (0.5,))
])

# Using custom dataset to load images 
training_dataset = CustomImageDataset(
  root_dir='data/processed/training-set', labels=training_labels, transform=transformer
)
testing_dataset = CustomImageDataset(
  root_dir='data/processed/training-set', labels=testing_labels, transform=transformer
)

print(len(training_dataset))
print(len(testing_dataset))
batch_size = 32
# prepared dataloader for neural network (note it is using batch size of 3, just for this sample)
training_dataloader = DataLoader(training_dataset, batch_size=batch_size, shuffle=True)
testing_dataloader = DataLoader(testing_dataset, batch_size=batch_size, shuffle=False)

11980
631


In [46]:
class BoneAgeModel(nn.Module):
  def __init__(self):
    super(BoneAgeModel, self).__init__()

    self.cnn = nn.Sequential(
      nn.Conv2d(1, 32, kernel_size=2, padding=1),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=2, stride=2),

      nn.Conv2d(32, 64, kernel_size=2, padding=1),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=2, stride=2),

      nn.Flatten(),

      nn.Linear(64 * (256 // 4) * (344 // 4), 128),
      nn.ReLU(),
      nn.Linear(128, 1)
    )


  def forward(self, x):
    return self.cnn(x)

In [47]:
import torch.optim as optim

# Initialize model, loss function, and optimizer
model = BoneAgeModel()
criterion = nn.L1Loss() # Mean Squared Error for regression
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training Loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0

    for images, ages, _, _ in training_dataloader:  # Batch size = 12
        ages = ages.float()
        optimizer.zero_grad()
        predictions = model(images).squeeze()
        loss = criterion(predictions, ages)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss / len(training_dataloader):.2f}")


Epoch 1/10, Loss: 31.45
Epoch 2/10, Loss: 27.07
Epoch 3/10, Loss: 25.00
Epoch 4/10, Loss: 23.41
Epoch 5/10, Loss: 22.05
Epoch 6/10, Loss: 20.97
Epoch 7/10, Loss: 19.21
Epoch 8/10, Loss: 17.81
Epoch 9/10, Loss: 16.32
Epoch 10/10, Loss: 15.20


In [50]:
import pandas as pd
import torch

# Ensure model is in evaluation mode
model.eval()
test_predictions = []
epoch_val_loss = 0  # Track validation loss

with torch.no_grad():
    for images, ages, gender, img_ids in testing_dataloader:  # `img_ids` is a batch
        predictions = model(images).squeeze()  # Get batched predictions

        # Loop through each item in the batch
        for i in range(len(img_ids)):  
            test_predictions.append([img_ids[i].item(), predictions[i].item()])  # ✅ Convert each tensor to scalar

            loss = criterion(predictions[i], ages[i].float())  # ✅ Ensure float type
            epoch_val_loss += loss.item()

            print(f"{img_ids[i].item():>6} {ages[i].item():>8} {predictions[i].item():>8.2f}")

# Compute final validation loss
val_loss = epoch_val_loss / len(testing_dataloader)
print(f"\n✅ Validation Loss: {val_loss:.4f}")  # Final validation loss

# Save predictions to CSV
submission_df = pd.DataFrame(test_predictions, columns=["id", "bone_age"])
submission_df.to_csv("submission.csv", index=False)
print("📁 Submission file saved!")


 11495    156.0   164.60
 12373    108.0   145.53
  1678    132.0   129.55
 15242    150.0   145.10
 12168    150.0   135.48
  7225    168.0   130.31
  5744    168.0   160.27
  2509    162.0   160.39
 11814    150.0   138.19
  3043    156.0   151.47
 11005    156.0   143.16
  9928     42.0    58.59
  1567     69.0    83.46
 12028    106.0    95.73
  4563    132.0   173.60
 11795     96.0    69.36
 15095    150.0   141.82
 13930    138.0   114.61
  7551    132.0   142.41
 12357    150.0   125.21
 14955    120.0   144.11
  7270     82.0   100.05
  6430    162.0   124.64
  4215     94.0   145.24
  3892    162.0   155.28
 14141    132.0   125.67
 15260    132.0   131.01
  8884    106.0    80.17
  1460     96.0    79.31
 12994    132.0   155.35
 12821     72.0    76.19
  3065     82.0   122.00
  2697     42.0   115.92
 11498     42.0    12.32
 11596    132.0   130.01
 10454    138.0   125.64
 13889    138.0   160.32
  5342    120.0   116.19
  7687    106.0   120.28
 15064    204.0   161.25


In [25]:
validation_labels

Unnamed: 0,id,boneage,male
12587,15583,132,True
12588,15584,54,True
12589,15585,162,True
12590,15586,192,True
12591,15587,50,False
12592,15588,113,False
12593,15589,84,True
12594,15591,162,True
12595,15593,180,True
12596,15594,108,True
