In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.nn as nn
import torch.optim as optim
from PIL import Image
import os
import re

class ColumbiaGazeDataset(Dataset):
    def __init__(self, data_path, transform=None):
        self.data_path = data_path
        self.transform = transform
        self.images = []
        self.labels = []

        # Load data from the data_path
        for file_name in os.listdir(data_path):
            if file_name.endswith('.jpg'):
                image_path = os.path.join(data_path, file_name)

                # Parse filename to extract labels
                # Format: <subject>_<head_pose>_<gaze_direction>.jpg
                parts = file_name.split('_')
                head_pose = int(parts[1])
                gaze_h, gaze_v = map(int, parts[2].split('.')[0].split('H'))

                self.images.append(image_path)
                self.labels.append([head_pose, gaze_h, gaze_v])

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = Image.open(self.images[idx]).convert('RGB')
        label = torch.tensor(self.labels[idx], dtype=torch.float32)

        if self.transform:
            image = self.transform(image)

        return image, label

# Define image transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to a standard size
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # ImageNet normalization
])

# Create datasets and dataloaders
train_dataset = ColumbiaGazeDataset('path/to/columbia_gaze_data', transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# For validation, you might want to use a subset of the data
val_dataset = ColumbiaGazeDataset('path/to/columbia_gaze_data', transform=transform)
val_indices = torch.randperm(len(val_dataset))[:len(val_dataset)//5]  # Use 20% of data for validation
val_dataset = torch.utils.data.Subset(val_dataset, val_indices)
val_loader = DataLoader(val_dataset, batch_size=32)

In [None]:

class GazeEstimationModel(nn.Module):
    def __init__(self):
        super(GazeEstimationModel, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, 3),  # Output: head pose, horizontal gaze, vertical gaze
        )

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

# Initialize the model
model = GazeEstimationModel()

In [None]:
import torch.optim as optim

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 50
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

In [None]:
def evaluate_model(model, data_loader, criterion):
    model.eval()
    total_loss = 0.0
    with torch.no_grad():
        for images, labels in data_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
    return total_loss / len(data_loader)

# Evaluate on validation set
val_loss = evaluate_model(model, val_loader, criterion)
print(f"Validation Loss: {val_loss:.4f}")

In [None]:
# Save the trained model
torch.save(model.state_dict(), 'gaze_estimation_model.pth')
print("Model saved successfully.")