Ноутбук с первой формально обученной моделью. В качестве основы используется Resnet50, функция потерь - TripletLoss. Можем заметить, что метрики при тестировании достаточно низкие

In [10]:
import os
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from sklearn.metrics import precision_score, recall_score, f1_score
import motmetrics as mm
import numpy as np
import warnings
warnings.filterwarnings("ignore")

class UAVDataset(Dataset):
    def __init__(self, root_dir, gt_dir, attr_dir, transform=None):
        self.root_dir = root_dir
        self.gt_dir = gt_dir
        self.attr_dir = attr_dir
        self.transform = transform
        self.data = self._load_data()

    def _load_data(self):
        data = []
        for seq in os.listdir(self.root_dir):
            gt_file = os.path.join(self.gt_dir, f"{seq}_gt.txt")
            attr_file = os.path.join(self.attr_dir, f"{seq}_attr.txt")
            if not os.path.exists(gt_file) or not os.path.exists(attr_file):
                continue
            with open(gt_file, 'r') as f_gt, open(attr_file, 'r') as f_attr:
                gt_lines = f_gt.readlines()
                attr_lines = f_attr.readlines()
            for gt_line, attr_line in zip(gt_lines, attr_lines):
                frame_id, obj_id, x, y, w, h, score, in_view, occlusion = gt_line.strip().split(',')
                if int(score) != 1:  # Учитываем только объекты с score = 1
                    continue
                attrs = list(map(int, attr_line.strip().split(',')))
                img_path = os.path.join(self.root_dir, seq, f"img{int(frame_id):06d}.jpg")
                data.append((img_path, int(obj_id), (float(x), float(y), float(w), float(h)), attrs))
        return data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path, obj_id, bbox, attrs = self.data[idx]
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            image = self.transform(image)
        return image, obj_id, bbox, attrs

class ReIDModel(nn.Module):
    def __init__(self, embedding_size=128, pretrained=True):
        super(ReIDModel, self).__init__()
        self.backbone = models.resnet50(pretrained=pretrained)
        in_features = self.backbone.fc.in_features
        self.backbone.fc = nn.Linear(in_features, embedding_size)

    def forward(self, x):
        return self.backbone(x)

def create_triplets(dataset, num_triplets=100):
    triplets = []
    for _ in range(num_triplets):
        anchor_idx = torch.randint(0, len(dataset), (1,)).item()
        anchor_img, anchor_id, _, _ = dataset[anchor_idx]
        positive_indices = [i for i in range(len(dataset)) if dataset[i][1] == anchor_id]
        if not positive_indices:
            continue
        positive_idx = torch.randint(0, len(positive_indices), (1,)).item()
        positive_img, _, _, _ = dataset[positive_indices[positive_idx]]
        negative_indices = [i for i in range(len(dataset)) if dataset[i][1] != anchor_id]
        if not negative_indices:
            continue
        negative_idx = torch.randint(0, len(negative_indices), (1,)).item()
        negative_img, _, _, _ = dataset[negative_indices[negative_idx]]
        triplets.append((anchor_img, positive_img, negative_img))
    return triplets

def evaluate_reid_no_reference(model, test_loader):
    model.eval()
    all_embeddings = []
    all_labels = []

    # Сбор всех эмбеддингов и меток
    with torch.no_grad():
        for images, obj_ids, bboxes, attrs in test_loader:
            embeddings = model(images).cpu().numpy()
            all_embeddings.extend(embeddings)
            all_labels.extend(obj_ids.cpu().numpy())

    all_embeddings = np.array(all_embeddings)
    all_labels = np.array(all_labels)

    # Вычисление попарных расстояний между всеми эмбеддингами
    distance_matrix = np.zeros((len(all_embeddings), len(all_embeddings)))
    for i in range(len(all_embeddings)):
        for j in range(len(all_embeddings)):
            distance_matrix[i, j] = np.linalg.norm(all_embeddings[i] - all_embeddings[j])

    # Вычисление Rank-1 и Rank-5
    rank1 = 0
    rank5 = 0
    for i in range(len(all_embeddings)):
        distances = distance_matrix[i]
        sorted_indices = np.argsort(distances)  # Индексы объектов, отсортированных по расстоянию
        sorted_labels = all_labels[sorted_indices]  # Соответствующие метки

        # Rank-1: Первый объект в отсортированном списке
        if sorted_labels[0] == all_labels[i]:
            rank1 += 1

        # Rank-5: Истинный объект в топ-5
        if all_labels[i] in sorted_labels[:5]:
            rank5 += 1

    rank1_accuracy = rank1 / len(all_labels)
    rank5_accuracy = rank5 / len(all_labels)

    # Вычисление mAP (mean Average Precision)
    average_precision = []
    for i in range(len(all_embeddings)):
        distances = distance_matrix[i]
        sorted_indices = np.argsort(distances)
        sorted_labels = all_labels[sorted_indices]

        # Вычисление Precision и Recall для каждого объекта
        relevant = (sorted_labels == all_labels[i]).astype(int)
        cumulative_relevant = np.cumsum(relevant)
        precision = cumulative_relevant / (np.arange(len(relevant)) + 1)
        recall = cumulative_relevant / np.sum(relevant)

        # Вычисление Average Precision (AP)
        ap = 0
        for k in range(len(precision)):
            if relevant[k]:
                ap += precision[k]
        ap /= np.sum(relevant)
        average_precision.append(ap)

    mAP = np.mean(average_precision)

    # Вычисление Precision, Recall и F1-score
    predicted_labels = []
    for i in range(len(all_embeddings)):
        distances = distance_matrix[i]
        sorted_indices = np.argsort(distances)
        predicted_label = all_labels[sorted_indices[1]]  # Берем ближайший объект (исключая себя)
        predicted_labels.append(predicted_label)

    precision = precision_score(all_labels, predicted_labels, average='macro')
    recall = recall_score(all_labels, predicted_labels, average='macro')
    f1 = f1_score(all_labels, predicted_labels, average='macro')

    return {
        "Rank-1": rank1_accuracy,
        "Rank-5": rank5_accuracy,
        "mAP": mAP,
        "Precision": precision,
        "Recall": recall,
        "F1-score": f1
    }

In [4]:
if __name__ == "__main__":
    root_dir = r'E:\Yandex\UAV-benchmark-M'
    gt_dir = r'E:\Yandex\UAV-benchmark-MOTD_v1.0\GT'
    attr_dir = r'E:\Yandex\M_attr\train'
    embedding_size = 128
    batch_size = 32
    num_epochs = 10
    num_triplets = 1000

    transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize((256, 128)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

In [None]:
dataset = UAVDataset(root_dir=root_dir, gt_dir=gt_dir, attr_dir=attr_dir, transform=transform)
triplets = create_triplets(dataset, num_triplets=num_triplets)

model = ReIDModel(embedding_size=embedding_size, pretrained=True)
criterion = nn.TripletMarginLoss(margin=1.0)
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(num_epochs):
    epoch_loss = 0
    for anchor, positive, negative in triplets:
        optimizer.zero_grad()
        anchor_emb = model(anchor.unsqueeze(0))
        positive_emb = model(positive.unsqueeze(0))
        negative_emb = model(negative.unsqueeze(0))
        loss = criterion(anchor_emb, positive_emb, negative_emb)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss/len(triplets)}')

torch.save(model.state_dict(), 'reid_model_2.pth')

In [11]:
# Тестирование с использованием метрик MOT
test_dataset = UAVDataset(root_dir=r'E:\Yandex\UAV-benchmark-M', gt_dir=r'E:\Yandex\UAV-benchmark-MOTD_v1.0\GT', attr_dir=r'E:\Yandex\M_attr\test', transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

model = ReIDModel(embedding_size=embedding_size, pretrained=True)
model_path = 'reid_model_2.pth'
model.load_state_dict(torch.load(model_path))

test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
metrics = evaluate_reid_no_reference(model, test_loader)
print(metrics)

{'Rank-1': 1.0, 'Rank-5': 1.0, 'mAP': np.float64(0.8516212580102716), 'Precision': 0.2777777777777778, 'Recall': 0.3125, 'F1-score': 0.29411764705882354}
