In [1]:
import torch
from torchvision.models import convnext_base
import torch.nn as nn
from PIL import Image

In [None]:
model = torch.load('untrained_aesthetic_scorer.pth').to('cuda')

model.train()

In [10]:
import os
from torch.utils.data import Dataset, DataLoader
from torchvision.io import read_image
from torchvision import transforms
import torch

class ScoredImageDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        """
        Args:
            root_dir (string): Directory with all the score folders.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.root_dir = root_dir
        self.transform = transform
        self.images = []
        self.labels = []

        # Load all images
        for score in os.listdir(root_dir):
            score_dir = os.path.join(root_dir, score)
            if not os.path.isdir(score_dir):
                continue  # Skip non-directories
            for img_name in os.listdir(score_dir):
                if img_name.lower().endswith(('.jpg', '.png')):
                    img_path = os.path.join(score_dir, img_name)
                    if os.path.isfile(img_path):
                        self.images.append(img_path)
                        self.labels.append(float(score))

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        while True:
            try:
                img_path = self.images[idx]
                image = Image.open(img_path).convert("RGB")
                break  # Success, exit the loop
            except:
                print(f"Skipping unreadable image: {img_path}")
                idx = (idx + 1) % len(self.images) 
                
        image = Image.open(img_path).convert("RGB")
        image = self.transform(image)
        label = torch.tensor(self.labels[idx], dtype=torch.float32)
        return image, label

In [11]:
from torch.utils.data import DataLoader, random_split
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Assuming ConvNeXt base model input size
    transforms.ToTensor(),
    transforms.ConvertImageDtype(torch.float),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

dataset = ScoredImageDataset(root_dir="data", transform=transform)

# Determine sizes for splitting
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size

# Split the dataset
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Create DataLoaders
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True, pin_memory=True)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=True, pin_memory=True)

In [13]:
def evaluate_model(test_dataloader, model, criterion):
    model.eval()  # Set the model to evaluation mode
    total_loss = 0.0
    total_close_guesses = 0  # To track the number of close guesses
    total_count = 0

    with torch.no_grad():  # No need to track gradients for evaluation
        for inputs, labels in test_dataloader:
            inputs = inputs.cuda()
            labels = labels.cuda()

            outputs = model(inputs).squeeze()
            loss = criterion(outputs, labels)

            # Calculate the total loss
            total_loss += loss.item() * inputs.size(0)  # Multiply by batch size

            # Calculate close guesses
            close_guesses = (outputs - labels).abs() <= 1.0  # Boolean tensor where close guesses are True
            total_close_guesses += close_guesses.sum().item()

            total_count += inputs.size(0)

    avg_loss = total_loss / total_count
    close_guess_accuracy = total_close_guesses / total_count  # Calculate the percentage of close guesses

    print(f"Test Loss: {avg_loss:.4f}")
    print(f"Close Guess Accuracy: {close_guess_accuracy:.4f}")

    # If you also want to calculate RMSE
    rmse = torch.sqrt(torch.tensor(avg_loss))
    print(f"RMSE: {rmse:.4f}")

In [None]:
import torch.optim as optim
import time

criterion = torch.nn.MSELoss()
#3rd epoch increased lr by 10x
#optimizer = optim.Adam(model.parameters(), lr=0.000001)

lr = 0.00001
optimizer = optim.Adam(model.parameters(), lr=lr)

print(f"Starting training run: lr={lr}")

for epoch in range(30):
    start_time = time.time()
    
    for inputs, labels in train_dataloader:
        inputs = inputs.cuda()
        labels = labels.cuda()

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), labels)
        loss.backward()
        optimizer.step()

    epoch_duration = time.time() - start_time  # Calculate the duration of the epoch
    print(f"Epoch {epoch+1}, Train Loss: {loss.item()}, Duration: {epoch_duration:.2f} seconds")
    evaluate_model(test_dataloader, model, criterion)
    print("\n\n")

In [15]:
nepochs = 12
torch.save(model.state_dict(), f'trained_aesthetic_scorer_{nepochs}_statedict.pth')
torch.save(model, f'trained_aesthetic_scorer_{nepochs}.pth')

In [27]:
def predict_image(model, img_path):
    image = Image.open(img_path).convert('RGB')

    # Define the transforms
    transform = transforms.Compose([
        transforms.Resize((224, 224)),  # Resize the image to 224x224 pixels
        transforms.ToTensor(),  # Convert the image to a tensor
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize
    ])

    image_tensor = transform(image)
    
    # Unsqueeze to add a batch dimension
    image_tensor = image_tensor.unsqueeze(0).to("cuda")
    
    # Make sure the model is in evaluation mode
    model.eval()
    
    # Perform inference
    with torch.no_grad():
        output = model(image_tensor)

    return output.item()

pred = predict_image(model, "/home/blackroot/Downloads/a.jpg")
print(round(pred))

7
