In [1]:
import os  # Import os for path management
from data_utils_timm import get_loader
import torch


def train_model(model, train_loader, test_loader, criterion, optimizer, num_epochs, device, save_dir):
    model.to(device)  # Move model to the appropriate device (CPU or GPU)
    
    best_val_loss = float('inf')  # Initialize best validation loss

    for epoch in range(num_epochs):
        model.train()  # Set the model to training mode
        running_loss = 0.0

        # Get the total number of batches
        total_batches = len(train_loader)

        for step, (images, labels) in enumerate(train_loader):
            print("starting training")
            labels = labels.view(20, -1) #TODO change 20 so batch size can be changed dynamically
            images, labels = images.to(device), labels.to(device)  # Move to device
            
            optimizer.zero_grad()  # Clear previous gradients
            outputs = model(images)  # Forward pass
            # Inside your training loop, before the loss calculation
            print("Outputs shape: ", outputs.shape)  # Should be [batch_size, 24]
            print("Labels shape: ", labels.shape)  # Should match [batch_size, 24]
            loss = criterion(outputs, labels)  # Calculate loss
            loss.backward()  # Backward pass
            optimizer.step()  # Update weights


            running_loss += loss.item() * images.size(0)  # Accumulate loss

            # Print step progress
            if step % 10 == 0 or step == total_batches - 1:  # Print every 10 steps and the last step
                print(f"Epoch [{epoch + 1}/{num_epochs}], Step [{step + 1}/{total_batches}], Loss: {loss.item():.4f}")

        epoch_loss = running_loss / len(train_loader.dataset)
        print(f"Epoch [{epoch + 1}/{num_epochs}], Average Loss: {epoch_loss:.4f}")

        # Validation step
        if test_loader is not None:
            model.eval()  # Set the model to evaluation mode
            val_loss = 0.0
            with torch.no_grad():
                for images, labels in test_loader:
                    images, labels = images.to(device), labels.to(device)
                    outputs = model(images)
                    # Inside your training loop, before the loss calculation
                    print("Outputs shape: ", outputs.shape)  # Should be [batch_size, 24]
                    print("Labels shape: ", labels.shape)  # Should match [batch_size, 24]  
                    loss = criterion(outputs, labels)
                    val_loss += loss.item() * images.size(0)  # Accumulate loss
            
            val_loss /= len(test_loader.dataset)
            print(f"Validation Loss: {val_loss:.4f}")

            # Save the model if the validation loss has improved
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                # Define the path for saving the model
                save_path = os.path.join(save_dir, f"best_model_epoch_{epoch + 1}.pth")
                torch.save(model.state_dict(), save_path)
                print(f"Model saved at: {save_path}")

    print("Training complete.")


In [2]:
import os
from timm import create_model
import torch.nn as nn
import torch.optim as optim
import torch
# Set your paths and parameters directly
current_dir = os.getcwd()
train_csv_file = os.path.join(current_dir,"projects/Facemap/data/train/augmented_data/augmented_labels.csv")
train_data_dir = os.path.join(current_dir, "projects/Facemap/data/train/augmented_data")
test_csv_file = os.path.join(current_dir,"projects/Facemap/data/test/augmented_data/augmented_labels.csv")
test_data_dir = os.path.join(current_dir, "projects/Facemap/data/test/augmented_data")
save_dir = os.path.join(current_dir, "projects/Facemap/data/output")
train_batch_size = 20
eval_batch_size = 20
num_epochs = 50  # Set your desired number of epochs

# Initialize the DataLoader
train_loader, test_loader = get_loader(train_csv_file, train_data_dir, test_csv_file, test_data_dir, train_batch_size, eval_batch_size)

# Create the model
model = create_model('vit_base_patch16_224', pretrained=True)

# Modify the last layer for regression
num_keypoints = 12
num_coordinates = num_keypoints * 2
model.head = nn.Linear(model.head.in_features, num_coordinates)

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.MSELoss()


In [3]:

# Train the model
train_model(model, train_loader, test_loader, criterion, optimizer, num_epochs, device, save_dir=save_dir)

starting training
Outputs shape:  torch.Size([20, 24])
Labels shape:  torch.Size([20, 24])
Epoch [1/50], Step [11/45], Loss: 8212.0137
starting training
Outputs shape:  torch.Size([20, 24])
Labels shape:  torch.Size([20, 24])
starting training
Outputs shape:  torch.Size([20, 24])
Labels shape:  torch.Size([20, 24])
