Средний скор нужен Вам как основной таргет для регрессии.
Датасет AVA устроен так, что каждая фотография имеет распределение оценок (сколько раз её оценили на 1, 2, 3 … 10 баллов). Чтобы обучить регрессионную нейросеть, нужно получить одно число, которое отражает “эстетическую привлекательность” снимка. Это число — средняя оценка (mean score).

Прочитаем необходимые колонки:

In [33]:
from pathlib import Path
import pandas as pd

# 1. Пути
PROJECT_ROOT = Path(r"C:\Users\main\Desktop\Aesthetics")  # при необходимости поправьте
DATA_ROOT = PROJECT_ROOT / "data" / "AVA"

ava_txt = DATA_ROOT / "AVA.txt"

train_ls_list = DATA_ROOT / "aesthetics_image_lists" / "generic_ls_train.jpgl"
val_ss_list   = DATA_ROOT / "aesthetics_image_lists" / "generic_ss_train.jpgl"
test_list     = DATA_ROOT / "aesthetics_image_lists" / "generic_test.jpgl"

images_dir = DATA_ROOT / "images"

print("AVA.txt exists:", ava_txt.exists())
print("LS train list exists:", train_ls_list.exists())
print("SS val list exists:", val_ss_list.exists())
print("Test list exists:", test_list.exists())
print("Images dir exists:", images_dir.exists())



AVA.txt exists: True
LS train list exists: True
SS val list exists: True
Test list exists: True
Images dir exists: True


In [34]:
# 2. Читаем AVA.txt и считаем mean_score

cols = ["image_id"] + [f"n_{i}" for i in range(1, 11)]
ava = pd.read_csv(
    ava_txt,
    sep=" ",
    header=None,
    usecols=range(1, 12),  # со 2-го по 11-й столбец: image_id + n1..n10
    names=cols,
)

vote_cols = [f"n_{i}" for i in range(1, 11)]
ava["n_total"] = ava[vote_cols].sum(axis=1)

# можно (по желанию) отфильтровать изображения с малым числом голосов
# ava = ava[ava["n_total"] >= 50].reset_index(drop=True)

weights = pd.Series(range(1, 11), index=vote_cols)
ava["mean_score"] = (ava[vote_cols] * weights).sum(axis=1) / ava["n_total"]

print(ava.head())
print("Всего строк в AVA:", len(ava))


   image_id  n_1  n_2  n_3  n_4  n_5  n_6  n_7  n_8  n_9  n_10  n_total  \
0    953619    0    1    5   17   38   36   15    6    5     1      124   
1    953958   10    7   15   26   26   21   10    8    1     2      126   
2    954184    0    0    4    8   41   56   10    3    4     0      126   
3    954113    0    1    4    6   48   37   23    5    2     2      128   
4    953980    0    3    6   15   57   39    6    1    1     1      129   

   mean_score  
0    5.637097  
1    4.698413  
2    5.674603  
3    5.773438  
4    5.209302  
Всего строк в AVA: 255530


In [35]:
# 3. Читаем списки id для train / val / test

train_ls_ids = pd.read_csv(
    train_ls_list,
    sep=" ",
    header=None,
    usecols=[0],
    names=["image_id"],
)

val_ss_ids = pd.read_csv(
    val_ss_list,
    sep=" ",
    header=None,
    usecols=[0],
    names=["image_id"],
)

test_ids = pd.read_csv(
    test_list,
    sep=" ",
    header=None,
    usecols=[0],
    names=["image_id"],
)

print("LS train ids:", train_ls_ids.shape)
print("SS val ids:", val_ss_ids.shape)
print("Test ids:", test_ids.shape)


LS train ids: (20000, 1)
SS val ids: (2500, 1)
Test ids: (20000, 1)


In [36]:
# 4. Формируем три датафрейма по академичному разбиению

train_df = ava[ava["image_id"].isin(train_ls_ids["image_id"])].copy()
val_df   = ava[ava["image_id"].isin(val_ss_ids["image_id"])].copy()
test_df  = ava[ava["image_id"].isin(test_ids["image_id"])].copy()

print("Train df:", train_df.shape)
print("Val df:",   val_df.shape)
print("Test df:",  test_df.shape)


Train df: (19924, 13)
Val df: (2495, 13)
Test df: (19930, 13)


In [37]:
# 5. Добавляем пути к изображениям

def make_path(img_id: int | str) -> str:
    return str(images_dir / f"{int(img_id)}.jpg")

for df in [train_df, val_df, test_df]:
    df["image_path"] = df["image_id"].apply(make_path)

# Проверим, что хотя бы один путь существует
print(train_df["image_path"].iloc[0], "exists:", Path(train_df["image_path"].iloc[0]).exists())


C:\Users\main\Desktop\Aesthetics\data\AVA\images\953777.jpg exists: True


In [38]:
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as T

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device


device(type='cuda')

In [39]:
print("torch version:", torch.__version__)
print("cuda available:", torch.cuda.is_available())
print("torch.cuda version:", torch.version.cuda)
print("device count:", torch.cuda.device_count())


torch version: 2.6.0+cu124
cuda available: True
torch.cuda version: 12.4
device count: 1


In [40]:
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [41]:
class AVARegressionDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform or T.Compose([
            T.Resize((256, 256)),
            T.CenterCrop(224),
            T.ToTensor(),
            T.Normalize(mean=[0.485, 0.456, 0.406],
                        std=[0.229, 0.224, 0.225]),
        ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        # простая защита от битых файлов
        for _ in range(3):  # до 3 попыток
            row = self.df.iloc[idx]
            img_path = row["image_path"]
            target = row["mean_score"]

            try:
                img = Image.open(img_path).convert("RGB")
                img = self.transform(img)
                y = torch.tensor(target, dtype=torch.float32)
                return img, y
            except OSError:
                # если картинка битая — берём следующий индекс по кругу
                idx = (idx + 1) % len(self.df)

        # если трижды не получилось — возвращаем чёрный квадрат, чтобы не падать
        img = torch.zeros(3, 224, 224, dtype=torch.float32)
        y = torch.tensor(0.0, dtype=torch.float32)
        return img, y


In [42]:
batch_size = 128  # при необходимости можно увеличить/уменьшить

train_ds = AVARegressionDataset(train_df)
val_ds   = AVARegressionDataset(val_df)
test_ds  = AVARegressionDataset(test_df)

train_loader = DataLoader(
    train_ds, batch_size=batch_size, shuffle=True,
    num_workers=4, pin_memory=True
)
val_loader = DataLoader(
    val_ds, batch_size=batch_size, shuffle=False,
    num_workers=4, pin_memory=True
)
test_loader = DataLoader(
    test_ds, batch_size=batch_size, shuffle=False,
    num_workers=4, pin_memory=True
)

len(train_ds), len(val_ds), len(test_ds)


(19924, 2495, 19930)

In [43]:
import torchvision.models as models
import torch.nn as nn

# предобученный ResNet-50
base_model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)

in_features = base_model.fc.in_features
base_model.fc = nn.Linear(in_features, 1)  # 1 выход для регрессии

model = base_model.to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5)

model


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [44]:
from torch import amp
import numpy as np

scaler = amp.GradScaler(device="cuda")  # вместо GradScaler() из torch.cuda.amp

@torch.no_grad()
def evaluate(model, loader):
    model.eval()
    mse_losses = []
    mae_losses = []

    for x, y in loader:
        x = x.to(device, non_blocking=True)
        y = y.to(device, non_blocking=True).unsqueeze(1)  # (B, 1)

        # валидации mixed precision тоже не помешает
        with amp.autocast(device_type="cuda", dtype=torch.float16):
            preds = model(x)
            mse = criterion(preds, y)
            mae = torch.mean(torch.abs(preds - y))

        mse_losses.append(mse.item())
        mae_losses.append(mae.item())

    return float(np.mean(mse_losses)), float(np.mean(mae_losses))


In [45]:
def train_model(model, train_loader, val_loader, epochs=10):
    history = []
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode="min", factor=0.5, patience=2, verbose=True
    )

    for epoch in range(1, epochs + 1):
        model.train()
        running_loss = 0.0
        n_batches = 0

        for x, y in train_loader:
            x = x.to(device, non_blocking=True)
            y = y.to(device, non_blocking=True).unsqueeze(1)

            optimizer.zero_grad(set_to_none=True)

            # mixed precision через torch.amp
            with amp.autocast(device_type="cuda", dtype=torch.float16):
                preds = model(x)
                loss = criterion(preds, y)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item()
            n_batches += 1

        train_mse = running_loss / max(1, n_batches)
        val_mse, val_mae = evaluate(model, val_loader)
        scheduler.step(val_mse)

        print(
            f"Epoch {epoch:02d} | "
            f"train MSE={train_mse:.4f} | "
            f"val MSE={val_mse:.4f}, val MAE={val_mae:.4f}"
        )

        history.append(
            {
                "epoch": epoch,
                "train_mse": train_mse,
                "val_mse": val_mse,
                "val_mae": val_mae,
            }
        )

    return history


==========================================


In [46]:
from torch.utils.data import DataLoader

batch_size = 128  # можно оставить как есть

train_loader = DataLoader(
    train_ds,
    batch_size=batch_size,
    shuffle=True,
    num_workers=0,          # ВАЖНО: 0
    pin_memory=True,
    persistent_workers=False
)

val_loader = DataLoader(
    val_ds,
    batch_size=batch_size,
    shuffle=False,
    num_workers=0,          # ВАЖНО: 0
    pin_memory=True,
    persistent_workers=False
)

test_loader = DataLoader(
    test_ds,
    batch_size=batch_size,
    shuffle=False,
    num_workers=0,          # ВАЖНО: 0
    pin_memory=True,
    persistent_workers=False
)

len(train_ds), len(val_ds), len(test_ds)


(19924, 2495, 19930)

In [47]:
import time
import torch

print("device var:", device)
p = next(model.parameters())
print("model param device:", p.device)

t0 = time.time()
x_cpu, y_cpu = next(iter(train_loader))
t1 = time.time()
print("got batch in", t1 - t0, "sec")
print("batch before .to:", x_cpu.device, y_cpu.device)

x = x_cpu.to(device)
y = y_cpu.to(device)
print("batch after .to:", x.device, y.device)

with torch.no_grad():
    preds = model(x)
print("preds device:", preds.device)


device var: cuda
model param device: cuda:0
got batch in 1.1392226219177246 sec
batch before .to: cpu cpu
batch after .to: cuda:0 cuda:0
preds device: cuda:0


In [48]:
history = train_model(model, train_loader, val_loader, epochs=5)





Epoch 01 | train MSE=3.3122 | val MSE=0.3388, val MAE=0.4611
Epoch 02 | train MSE=0.3241 | val MSE=0.2301, val MAE=0.3789
Epoch 03 | train MSE=0.2128 | val MSE=0.1599, val MAE=0.3212
Epoch 04 | train MSE=0.1095 | val MSE=0.0847, val MAE=0.2297
Epoch 05 | train MSE=0.0674 | val MSE=0.0524, val MAE=0.1782


Проводим оценки.

In [50]:
from scipy.stats import pearsonr, spearmanr
import numpy as np
import torch

@torch.no_grad()
def eval_full(model, loader):
    model.eval()
    preds_all = []
    targets_all = []

    for x, y in loader:
        x = x.to(device, non_blocking=True)
        y = y.to(device, non_blocking=True)

        with amp.autocast(device_type="cuda", dtype=torch.float16):
            preds = model(x).squeeze(1).float()

        preds_all.append(preds.cpu().numpy())
        targets_all.append(y.cpu().numpy())

    preds_all = np.concatenate(preds_all)
    targets_all = np.concatenate(targets_all)

    mse = np.mean((preds_all - targets_all) ** 2)
    mae = np.mean(np.abs(preds_all - targets_all))
    rmse = np.sqrt(mse)

    pearson = pearsonr(preds_all, targets_all)[0]
    spearman = spearmanr(preds_all, targets_all)[0]

    # простое R^2
    ss_res = np.sum((preds_all - targets_all) ** 2)
    ss_tot = np.sum((targets_all - targets_all.mean()) ** 2)
    r2 = 1 - ss_res / ss_tot

    return {
        "mse": mse,
        "rmse": rmse,
        "mae": mae,
        "pearson": pearson,
        "spearman": spearman,
        "r2": r2,
    }

test_metrics = eval_full(model, test_loader)
test_metrics




{'mse': np.float32(0.39050138),
 'rmse': np.float32(0.6249011),
 'mae': np.float32(0.49264506),
 'pearson': np.float32(0.5852297),
 'spearman': np.float64(0.5757736827637447),
 'r2': np.float32(0.30879843)}

In [52]:
from pathlib import Path
import torch

models_dir = Path("models")
models_dir.mkdir(exist_ok=True)

model_path = models_dir / "resnet50_ava_regression.pt"
torch.save(model.state_dict(), model_path)

model_path


WindowsPath('models/resnet50_ava_regression.pt')