In [1]:
import os
import pandas as pd
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
import numpy as np
from tqdm import tqdm
import copy

# Load the data
train_df = pd.read_csv('/kaggle/input/applications-of-deep-learning-wustl-spring-2024/faces-age/train.csv')
test_df = pd.read_csv('/kaggle/input/applications-of-deep-learning-wustl-spring-2024/faces-age/test.csv')

# Define dataset class
class FacesAgeDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None):
        self.dataframe = dataframe
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.dataframe.iloc[idx, 1])
        image = Image.open(img_name)
        age = self.dataframe.iloc[idx, 2]
        if self.transform:
            image = self.transform(image)
        return image, age

# Set root directory and transformations
root_dir = '/kaggle/input/applications-of-deep-learning-wustl-spring-2024/faces-age/'
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Split the dataset
train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42)

# Create datasets
train_dataset = FacesAgeDataset(dataframe=train_df, root_dir=root_dir, transform=transform)
val_dataset = FacesAgeDataset(dataframe=val_df, root_dir=root_dir, transform=transform)

# Define the model
model = models.resnet18(pretrained=True)
for param in model.parameters():
    param.requires_grad = False
model.fc = nn.Sequential(
    nn.Linear(model.fc.in_features, 1024),
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(1024, 512),
    nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(512, 1)
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training setup
criterion = nn.MSELoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.001, weight_decay=1e-4)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Training loop
for epoch in range(100):  # Number of epochs
    model.train()
    running_loss = 0.0
    for inputs, ages in train_loader:
        inputs, ages = inputs.to(device), ages.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, ages.view(-1, 1).float())
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    epoch_loss = running_loss / len(train_loader.dataset)
    print(f'Epoch {epoch+1}, Training Loss: {epoch_loss:.4f}')

# Prepare test dataset and loader
class FacesAgeTestDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None):
        self.dataframe = dataframe
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.dataframe.iloc[idx, 1])
        image = Image.open(img_name)
        if self.transform:
            image = self.transform(image)
        return image

test_dataset = FacesAgeTestDataset(dataframe=test_df, root_dir=root_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Making predictions
model.eval()
predictions = []
with torch.no_grad():
    for inputs in tqdm(test_loader, desc="Processing"):
        inputs = inputs.to(device)
        outputs = model(inputs)
        predictions.extend(outputs.view(-1).cpu().numpy())

# Prepare submission
submit_df = pd.DataFrame({
    "id": test_df["id"],
    "age": predictions
})
submit_df.to_csv('/kaggle/working/submission.csv', index=False)
print("Submission DataFrame generated and saved.")


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 126MB/s]


Epoch 1, Training Loss: 177.0127
Epoch 2, Training Loss: 143.7188
Epoch 3, Training Loss: 135.4669
Epoch 4, Training Loss: 127.6265
Epoch 5, Training Loss: 125.8316
Epoch 6, Training Loss: 129.9745
Epoch 7, Training Loss: 121.6276
Epoch 8, Training Loss: 125.4716
Epoch 9, Training Loss: 121.5880
Epoch 10, Training Loss: 117.3416
Epoch 11, Training Loss: 119.4099
Epoch 12, Training Loss: 119.8968
Epoch 13, Training Loss: 118.9803
Epoch 14, Training Loss: 113.5232
Epoch 15, Training Loss: 119.7649
Epoch 16, Training Loss: 114.6546
Epoch 17, Training Loss: 115.3414
Epoch 18, Training Loss: 113.1462
Epoch 19, Training Loss: 111.9768
Epoch 20, Training Loss: 111.9959
Epoch 21, Training Loss: 110.4659
Epoch 22, Training Loss: 110.3604
Epoch 23, Training Loss: 108.8899
Epoch 24, Training Loss: 106.2986
Epoch 25, Training Loss: 110.4386
Epoch 26, Training Loss: 107.5438
Epoch 27, Training Loss: 107.0767
Epoch 28, Training Loss: 106.5855
Epoch 29, Training Loss: 104.2270
Epoch 30, Training Loss

Processing: 100%|██████████| 49/49 [00:28<00:00,  1.71it/s]

Submission DataFrame generated and saved.



