In [20]:
import torch
from torchvision.models import convnext_base
import torch.nn as nn

In [15]:
# Load the model architecture
model = convnext_base(pretrained=True)  # Use pretrained=False since you're loading custom weights


Downloading: "https://download.pytorch.org/models/convnext_base-6075fbad.pth" to /home/blackroot/.cache/torch/hub/checkpoints/convnext_base-6075fbad.pth
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 338M/338M [00:05<00:00, 66.5MB/s]


In [None]:
print(model)

In [26]:
model.classifier = nn.Sequential(
    nn.Flatten(start_dim=1, end_dim=-1),
    nn.LayerNorm(normalized_shape=1024, eps=1e-06, elementwise_affine=True),
    nn.Linear(in_features=1024, out_features=1, bias=True)
    # Optionally, add an activation function here if needed
)

In [None]:
model.eval()

In [None]:
dummy_input = torch.randn(1, 3, 224, 224)

# If you have a GPU available and want to use it, move the model and input to GPU
if torch.cuda.is_available():
    model = model.cuda()
    dummy_input = dummy_input.cuda()

# Perform inference
with torch.no_grad():
    output = model(dummy_input)

# Interpret the output
if output.shape[1] == 1:  # Regression task
    # Directly use the output as your predicted score
    predicted_score = output.item()
    print(f"Predicted Score: {predicted_score}")
else:  # Classification task
    # Convert logits to probabilities and find the predicted class
    probabilities = F.softmax(output, dim=1)
    predicted_class = torch.argmax(probabilities, dim=1).item()
    print(f"Predicted Class: {predicted_class}")

In [54]:
from PIL import Image
from torchvision import transforms

# Load your image
image_path = 'testing/109882.png'
image = Image.open(image_path).convert('RGB')

# Define the transforms
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize the image to 224x224 pixels
    transforms.ToTensor(),  # Convert the image to a tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize
])

# Apply the transforms to the image
image_tensor = transform(image)

# Unsqueeze to add a batch dimension
image_tensor = image_tensor.unsqueeze(0).to("cuda")

# Make sure the model is in evaluation mode
model.eval()

# Perform inference
with torch.no_grad():
    output = model(image_tensor)

# Process the output
# For regression
if output.shape[1] == 1:  # Assuming the model is for regression
    predicted_score = output.item() * 10  # Assuming you scale the output as mentioned
    print(f'Predicted score: {predicted_score}')
else:  # For classification
    probabilities = torch.softmax(output, dim=1)
    predicted_class = probabilities.argmax().item()
    print(f'Predicted class: {predicted_class}')

Predicted score: 8.01877737045288


In [56]:
torch.save(model.state_dict(), 'untrained_aesthetic_scorer.pth')
torch.save(model, 'full_model.pth')