In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [5]:
!pip install efficientnet_pytorch
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.model_selection import KFold
import torch
from torch import nn, optim
import torchvision
from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
from efficientnet_pytorch import EfficientNet



In [6]:
class AgeDataset(Dataset):
    def __init__(self, df, data_path, transform=None):
        self.df = df
        self.data_path = data_path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        img_name = os.path.join(self.data_path, self.df.iloc[index, 0])
        image = Image.open(img_name).convert('RGB')
        age = self.df.iloc[index, 1]
        if self.transform:
            image = self.transform(image)
        return image, age

# Define data paths and annotation loading
data_path = '/kaggle/input/smai-24-age-prediction/content/faces_dataset/train'
annotations_path = '/kaggle/input/smai-24-age-prediction/content/faces_dataset/train.csv'
annotations = pd.read_csv(annotations_path)

# Enhanced data augmentation
transform = transforms.Compose([
    transforms.Resize((260, 260)),
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(),
    transforms.RandomAffine(degrees=10, translate=(0.1, 0.1), scale=(0.9, 1.1)),
    transforms.RandomPerspective(distortion_scale=0.05),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [7]:
# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Set up K-Fold cross-validation
num_epochs = 15
kf = KFold(n_splits=5, shuffle=True, random_state=42)
fold_performance = {}

for fold, (train_index, val_index) in enumerate(kf.split(annotations)):
    print(f'Fold {fold + 1}')

    train_df = annotations.iloc[train_index]
    val_df = annotations.iloc[val_index]
    
    train_dataset = AgeDataset(train_df, data_path, transform=transform)
    val_dataset = AgeDataset(val_df, data_path, transform=transform)

    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=2)

    # Initialize the model with EfficientNet-B0
    model = EfficientNet.from_pretrained('efficientnet-b0')
    num_ftrs = model._fc.in_features
    model._fc = nn.Linear(num_ftrs, 1)
    model = model.to(device)

    # Loss function and optimizer
    criterion = nn.L1Loss()
    optimizer = optim.Adam(model.parameters(), lr=1e-3)  # Learning rate adjusted to 1e-3
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=5, eta_min=1e-6)

    # Training loop with early stopping
    best_val_loss = float('inf')
    best_model_state = None
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for images, ages in tqdm(train_loader, desc=f'Training Epoch {epoch+1}', leave=False):
            images, ages = images.to(device), ages.to(device).float().view(-1, 1)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, ages)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        # Validation step
        model.eval()
        total_val_loss = 0
        with torch.no_grad():
            for val_images, val_ages in tqdm(val_loader, desc=f'Validating Epoch {epoch+1}', leave=False):
                val_images, val_ages = val_images.to(device), val_ages.to(device).float().view(-1, 1)
                val_outputs = model(val_images)
                val_loss = criterion(val_outputs, val_ages)
                total_val_loss += val_loss.item()

        avg_val_loss = total_val_loss / len(val_loader)
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            best_model_state = model.state_dict().copy()
            torch.save(best_model_state, f'best_model_fold_{fold}.pth')
            print(f"Fold {fold+1}, Epoch {epoch+1}, Validation Loss Improved to {best_val_loss:.4f}")

        scheduler.step(avg_val_loss)

    fold_performance[fold] = best_val_loss

# Find and load the best fold
best_fold = min(fold_performance, key=fold_performance.get)
model.load_state_dict(torch.load(f'best_model_fold_{best_fold}.pth'))

Fold 1


Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth" to /root/.cache/torch/hub/checkpoints/efficientnet-b0-355c32eb.pth
100%|██████████| 20.4M/20.4M [00:00<00:00, 118MB/s] 


Loaded pretrained weights for efficientnet-b0




Fold 1, Epoch 1, Validation Loss Improved to 9.9044


                                                                   

Fold 1, Epoch 2, Validation Loss Improved to 5.6636


                                                                   

Fold 1, Epoch 3, Validation Loss Improved to 5.2401


                                                                   

Fold 1, Epoch 4, Validation Loss Improved to 5.1903


                                                                   

Fold 1, Epoch 5, Validation Loss Improved to 5.1559


                                                                    

Fold 1, Epoch 10, Validation Loss Improved to 5.1364


                                                                    

Fold 1, Epoch 12, Validation Loss Improved to 5.1097


                                                                    

Fold 2
Loaded pretrained weights for efficientnet-b0


                                                                   

Fold 2, Epoch 1, Validation Loss Improved to 9.0401


                                                                   

Fold 2, Epoch 2, Validation Loss Improved to 6.0591


                                                                   

Fold 2, Epoch 3, Validation Loss Improved to 5.3851


                                                                   

Fold 2, Epoch 4, Validation Loss Improved to 5.0311


                                                                   

Fold 2, Epoch 8, Validation Loss Improved to 5.0264


                                                                   

Fold 2, Epoch 9, Validation Loss Improved to 4.9980


                                                                    

Fold 2, Epoch 11, Validation Loss Improved to 4.9914


                                                                    

Fold 3
Loaded pretrained weights for efficientnet-b0


                                                                   

Fold 3, Epoch 1, Validation Loss Improved to 20.1289


                                                                   

Fold 3, Epoch 2, Validation Loss Improved to 6.5693


                                                                   

Fold 3, Epoch 3, Validation Loss Improved to 5.3173


                                                                   

Fold 3, Epoch 4, Validation Loss Improved to 4.9963


                                                                   

Fold 3, Epoch 5, Validation Loss Improved to 4.9911


                                                                   

Fold 3, Epoch 7, Validation Loss Improved to 4.9587


                                                                    

Fold 3, Epoch 12, Validation Loss Improved to 4.9519


                                                                    

Fold 4
Loaded pretrained weights for efficientnet-b0


                                                                   

Fold 4, Epoch 1, Validation Loss Improved to 8.8817


                                                                   

Fold 4, Epoch 2, Validation Loss Improved to 6.4115


                                                                   

Fold 4, Epoch 3, Validation Loss Improved to 5.1999


                                                                   

Fold 4, Epoch 4, Validation Loss Improved to 5.0455


                                                                   

Fold 4, Epoch 5, Validation Loss Improved to 5.0362


                                                                   

Fold 4, Epoch 7, Validation Loss Improved to 4.9938


                                                                    

Fold 4, Epoch 13, Validation Loss Improved to 4.9678


                                                                    

Fold 5
Loaded pretrained weights for efficientnet-b0


                                                                   

Fold 5, Epoch 1, Validation Loss Improved to 12.9594


                                                                   

Fold 5, Epoch 2, Validation Loss Improved to 5.7791


                                                                   

Fold 5, Epoch 3, Validation Loss Improved to 5.2457


                                                                   

Fold 5, Epoch 4, Validation Loss Improved to 5.1648


                                                                   

Fold 5, Epoch 6, Validation Loss Improved to 5.1466


                                                                    

Fold 5, Epoch 12, Validation Loss Improved to 5.1372


                                                                    

Fold 5, Epoch 14, Validation Loss Improved to 5.1207


                                                                    

<All keys matched successfully>

In [8]:
# Prepare the test dataset
test_data_path = '/kaggle/input/smai-24-age-prediction/content/faces_dataset/test'
test_annotations_path = '/kaggle/input/smai-24-age-prediction/content/faces_dataset/submission.csv'
test_dataset = AgeDataset(pd.read_csv(test_annotations_path), test_data_path, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)

# Define the prediction function
def predict(loader, model):
    model.eval()
    predictions = []
    with torch.no_grad():
        for images, _ in tqdm(loader, desc='Predicting', leave=False):
            images = images.to(device)
            outputs = model(images)
            predictions.extend(outputs.view(-1).cpu().numpy())
    return predictions

# Get predictions
predictions = predict(test_loader, model)

# Create the submission file
submission = pd.read_csv(test_annotations_path)
submission['age'] = predictions
submission.to_csv('/kaggle/working/submission.csv', index=False)

                                                           