In [2]:
!pip install pytorch-metric-learning
!pip install faiss-gpu

Collecting pytorch-metric-learning
  Downloading pytorch_metric_learning-2.5.0-py3-none-any.whl (119 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/119.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.1/119.1 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.6.0->pytorch-metric-learning)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.6.0->pytorch-metric-learning)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.6.0->pytorch-metric-learning)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.6.0->pytorch-metric-learning)
  Using cached nvidia_cudnn_cu12-8.9.2.2

# Импорт

In [6]:
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torchvision.models as models
import torch.nn as nn
import pandas as pd
import zipfile
import os
import csv
from PIL import Image
from torchvision.transforms import ToTensor
from torch.utils.data import Dataset
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
from pytorch_metric_learning import testers
from pytorch_metric_learning.utils.accuracy_calculator import AccuracyCalculator

Загрузка аномального набора данных

In [5]:
zip_test = "Anom.zip"
imgzip = zipfile.ZipFile(zip_test, 'r')
imgzip.extractall()

iterator = 0
dataset_root = 'Anom'
csv_file_path = 'dataset.csv'
with open(csv_file_path, mode='w', newline='') as csv_file:
    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(['image', 'class'])
    for class_name in os.listdir(dataset_root):
        class_path = os.path.join(dataset_root, class_name)
        if os.path.isdir(class_path):
            iterator = iterator + 1
            for image_name in os.listdir(class_path):
                image_path = os.path.join(class_path, image_name)
                if os.path.isfile(image_path):
                    relative_image_path = os.path.relpath(image_path, dataset_root)
                    csv_writer.writerow([relative_image_path, iterator])

# Класс для создания кастомного набора данных для работы с pytorch

In [7]:
class CustomImageDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, label_column='label'):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.label_column = label_column

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = Image.open(img_path).convert("RGB")
        label = self.img_labels.iloc[idx, self.img_labels.columns.get_loc(self.label_column)]
        if isinstance(label, str):
            try:
                label = int(label)
            except ValueError:
                print(f"Warning: Non-integer label encountered: {label}")
        if self.transform:
            image = self.transform(image)
        return image, torch.tensor(label)

# Определение модели

In [8]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.resnet = models.resnet18(pretrained=True)
        for param in self.resnet.parameters():
            param.requires_grad = False

        num_ftrs = self.resnet.fc.in_features
        self.resnet.fc = nn.Sequential(
            nn.Linear(num_ftrs, 256),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(256, 64)
        )

    def forward(self, x):
        x = self.resnet(x)
        return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Net().to(device)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 77.8MB/s]


# Загрузка модели

In [9]:
model.load_state_dict(torch.load('trained_model.pth'))
model.eval()

RuntimeError: Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device('cpu') to map your storages to the CPU.

# Загрузка датасета

In [None]:
annotations_file = "anomalous_dataset.csv"
img_dir = "DSM-50"
label_column = 'class'

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

anomalous_dataset = CustomImageDataset(annotations_file, img_dir, transform=test_transform, label_column=label_column)
anomalous_loader = DataLoader(anomalous_dataset, batch_size=16, shuffle=False)

# Функции для тестирования

In [None]:
def get_all_embeddings(dataset, model):
    tester = testers.BaseTester()
    return tester.get_all_embeddings(dataset, model)

def visualize_embeddings(embeddings, labels):
    plt.figure(figsize=(10, 10))
    plt.scatter(embeddings[:, 0], embeddings[:, 1], c=labels, cmap='viridis')
    plt.colorbar()
    plt.show()

def calculate_metrics(model, dataloader):
    all_labels = []
    all_preds = []
    with torch.no_grad():
        for data, labels in dataloader:
            data = data.to(device)
            outputs = model(data)
            _, preds = torch.max(outputs, 1)
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())

    print(classification_report(all_labels, all_preds, target_names=["Class 0", "Class 1"]))

    cm = confusion_matrix(all_labels, all_preds)
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Class 0", "Class 1"], yticklabels=["Class 0", "Class 1"])
    plt.ylabel('Actual')
    plt.xlabel('Predicted')
    plt.show()

Визуализация векторов

In [None]:
embeddings, labels = get_all_embeddings(anomalous_dataset, model)
visualize_embeddings(embeddings, labels)

Метрики

In [None]:
calculate_metrics(model, anomalous_loader)

# Итоги

В данном блокноте мы загрузили обученную модель, применили её к набору данных с аномальными примерами и визуализировали результаты.
Также мы рассчитали метрики качества модели, такие как отчёт о классификации и матрицу ошибок.

Модель показала определённый уровень точности при классификации аномальных данных. Это может свидетельствовать о её способности
распознавать отклонения от эталонных примеров. Дальнейшая работа может включать улучшение архитектуры модели, увеличение объема данных для обучения,
а также более тщательное исследование гиперпараметров.