In [11]:
# Replace the existing ViT model loading with timm
from timm import create_model
import torch

# Create a model
model = create_model('vit_base_patch16_224', pretrained=True)



In [12]:
#print(model)

# Modify the model for regression
num_keypoints = 12  # Number of keypoints
num_coordinates = num_keypoints * 2  # Each keypoint has an x and y coordinate

# Replace the classification head with a regression head
model.head = torch.nn.Linear(model.head.in_features, num_coordinates)

print(model)

VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (patch_drop): Identity()
  (norm_pre): Identity()
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=768, out_features=2304, bias=True)
        (q_norm): Identity()
        (k_norm): Identity()
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=768, out_features=768, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): Identity()
      (drop_path1): Identity()
      (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=768, out_features=3072, bias=True)
        (act): GELU(approximate='none')
        (drop1): Dropout(p=0.0, inplace=False)
        (norm): Identity(

In [13]:
import torch.nn as nn
import torch.optim as optim

# Move the model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Define your optimizer
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Define the Mean Squared Error loss function
criterion = nn.MSELoss()

In [19]:
from torchvision import transforms
from torch.utils.data import Dataset

class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample):
        image, landmarks = sample["image"], sample["landmarks"]
        # image = image.transpose((2, 0, 1))
        image = torch.from_numpy(image.transpose((2, 0, 1))).float()
        return {"image": image, "landmarks": torch.from_numpy(landmarks)}
    
class Normalize(object):
    """Normalize the image in a sample."""

    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def __call__(self, sample):
        image, landmarks = sample["image"], sample["landmarks"]
        image = transforms.functional.normalize(image, mean=self.mean, std=self.std)
        return {"image": image, "landmarks": landmarks}


class FaceLandmarksDataset(Dataset):
    """Face Landmarks dataset."""

    def __init__(self, root_dir, csv_file, transform=None):
        """
        Arguments:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.landmarks_frame = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.landmarks_frame)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        img_name = os.path.join(self.root_dir, self.landmarks_frame.iloc[idx, 0])
        image = io.imread(img_name)
        landmarks = self.landmarks_frame.iloc[idx, 1:]
        landmarks = np.array([landmarks], dtype=float).reshape(-1, 2)
        sample = {"image": image, "landmarks": landmarks}

        if self.transform:
            sample = self.transform(sample)

        return sample


In [20]:
import torch
import torch.nn as nn
import torch.optim as optim
from timm import create_model
from torchvision import transforms
from torch.utils.data import DataLoader

# Assuming you have already defined your FaceLandmarksDataset
# from your_dataset_file import FaceLandmarksDataset

def get_loader(train_csv_file, train_data_dir, test_csv_file, test_data_dir, train_batch_size, eval_batch_size):
    # Dynamically load paths for training and testing data
    trainset = FaceLandmarksDataset(
        csv_file=train_csv_file,
        root_dir=train_data_dir,
        transform=transforms.Compose(
            [
                ToTensor(),
                Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # ImageNet values
            ]
        ),
    )

    testset = FaceLandmarksDataset(
        csv_file=test_csv_file,
        root_dir=test_data_dir,
        transform=transforms.Compose(
            [
                ToTensor(),
                Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ]
        ),
    )

    train_loader = DataLoader(
        trainset,
        batch_size=train_batch_size,
        num_workers=4,
        shuffle=True,
        pin_memory=True,
    )
    
    test_loader = (
        DataLoader(
            testset,
            batch_size=eval_batch_size,
            num_workers=4,
            pin_memory=True,
        )
        if testset is not None
        else None
    )

    return train_loader, test_loader


In [29]:
import os  # Import os for path management

def train_model(model, train_loader, test_loader, criterion, optimizer, num_epochs, device, save_dir):
    model.to(device)  # Move model to the appropriate device (CPU or GPU)
    
    best_val_loss = float('inf')  # Initialize best validation loss

    for epoch in range(num_epochs):
        model.train()  # Set the model to training mode
        running_loss = 0.0

        # Get the total number of batches
        total_batches = len(train_loader)

        for step, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)  # Move to device
            
            optimizer.zero_grad()  # Clear previous gradients
            outputs = model(images)  # Forward pass
            
            loss = criterion(outputs, labels)  # Calculate loss
            loss.backward()  # Backward pass
            optimizer.step()  # Update weights

            running_loss += loss.item() * images.size(0)  # Accumulate loss

            # Print step progress
            if step % 10 == 0 or step == total_batches - 1:  # Print every 10 steps and the last step
                print(f"Epoch [{epoch + 1}/{num_epochs}], Step [{step + 1}/{total_batches}], Loss: {loss.item():.4f}")

        epoch_loss = running_loss / len(train_loader.dataset)
        print(f"Epoch [{epoch + 1}/{num_epochs}], Average Loss: {epoch_loss:.4f}")

        # Validation step
        if test_loader is not None:
            model.eval()  # Set the model to evaluation mode
            val_loss = 0.0
            with torch.no_grad():
                for images, labels in test_loader:
                    images, labels = images.to(device), labels.to(device)
                    outputs = model(images)
                    loss = criterion(outputs, labels)
                    val_loss += loss.item() * images.size(0)  # Accumulate loss
            
            val_loss /= len(test_loader.dataset)
            print(f"Validation Loss: {val_loss:.4f}")

            # Save the model if the validation loss has improved
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                # Define the path for saving the model
                save_path = os.path.join(save_dir, f"best_model_epoch_{epoch + 1}.pth")
                torch.save(model.state_dict(), save_path)
                print(f"Model saved at: {save_path}")

    print("Training complete.")


In [26]:
import os
import pandas as pd
# Set your paths and parameters directly
current_dir = os.getcwd()
train_csv_file = os.path.join(current_dir,"projects/Facemap/data/train/augmented_data/augmented_labels.csv")
train_data_dir = os.path.join(current_dir, "projects/Facemap/data/train/augmented_data")
test_csv_file = os.path.join(current_dir,"projects/Facemap/data/test/augmented_data/augmented_labels.csv")
test_data_dir = os.path.join(current_dir, "projects/Facemap/data/test/augmented_data")
save_dir = os.path.join(current_dir, "projects/Facemap/data/output")
train_batch_size = 20
eval_batch_size = 20
num_epochs = 50  # Set your desired number of epochs

# Initialize the DataLoader
train_loader, test_loader = get_loader(train_csv_file, train_data_dir, test_csv_file, test_data_dir, train_batch_size, eval_batch_size)

# Create the model
model = create_model('vit_base_patch16_224', pretrained=True)

# Modify the last layer for regression
num_keypoints = 12
num_coordinates = num_keypoints * 2
model.head = nn.Linear(model.head.in_features, num_coordinates)

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.MSELoss()


In [28]:

# Train the model
train_model(model, train_loader, test_loader, criterion, optimizer, num_epochs, device, save_dir=save_dir)

Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/Users/annastuckert/anaconda3/lib/python3.11/multiprocessing/spawn.py", line 122, in spawn_main
    exitcode = _main(fd, parent_sentinel)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/annastuckert/anaconda3/lib/python3.11/multiprocessing/spawn.py", line 132, in _main
    self = reduction.pickle.load(from_parent)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: Can't get attribute 'FaceLandmarksDataset' on <module '__main__' (built-in)>


KeyboardInterrupt: 