In [97]:
import os

import cv2
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import torch
from sklearn.model_selection import train_test_split
from torch import nn
from torch.cuda.amp import GradScaler
from torch.utils.data import DataLoader, Dataset
from torchvision import models, transforms

sns.set_theme()
%matplotlib inline

In [98]:
MAIN_DATA_DIR = "../data"
data_dir = f"{MAIN_DATA_DIR}/all_data"
labels_file = os.path.join(data_dir, "labels.csv")

In [99]:
df_data = pd.read_csv(labels_file)
df_data = df_data[df_data["class"] != "SkinCancer"]
df_data = df_data.sample(frac=0.1, random_state=42).reset_index(drop=True)
df_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2528 entries, 0 to 2527
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   class     2528 non-null   object
 1   filename  2528 non-null   object
dtypes: object(2)
memory usage: 39.6+ KB


In [100]:
train_val, test = train_test_split(df_data, test_size=0.1, random_state=42, stratify=df_data["class"])
train, val = train_test_split(train_val, test_size=0.1, random_state=42, stratify=train_val["class"])


In [101]:
class SkinDataset(Dataset):
    def __init__(self, df, root_dir, transform=None):
        self.df = df.reset_index(drop=True)  # Сбрасываем индексы
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_class = str(self.df.iloc[idx, 0])  # Приводим к строке
        img_name = self.df.iloc[idx, 1]  # filename
        img_path = os.path.join(self.root_dir, img_class, img_name)  # Исправляем путь

        if not os.path.exists(img_path):
            print(f"⚠️ Файл не найден: {img_path}")
            return None  # Пропускаем отсутствующие файлы

        image = cv2.imread(img_path, cv2.IMREAD_COLOR)
        if image is None:
            print(f"⚠️ Ошибка загрузки изображения: {img_path}")
            return None  # Пропускаем пустые изображения

        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.transform:
            image = self.transform(image)

        label = torch.tensor(1 if self.df.iloc[idx, 0] == "malignant" else 0, dtype=torch.long)
        return image, label


In [102]:
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((64, 64)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(360),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5]),
])


In [103]:
train_dataset = SkinDataset(train, data_dir, transform)
val_dataset = SkinDataset(val, data_dir, transform)
test_dataset = SkinDataset(test, data_dir, transform)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [104]:
class ResNet50Model(nn.Module):
    def __init__(self, num_classes=2):
        super().__init__()
        self.model = models.resnet18(pretrained=True)
        self.model.fc = nn.Linear(512, num_classes)

    def forward(self, x):
        return self.model(x)


In [105]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes = 2
model = ResNet50Model(num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1.e-3)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
scaler = GradScaler()

  scaler = GradScaler()


In [None]:
def train_model(model, train_loader, val_loader, epochs=10):
    train_acc_history = []
    val_acc_history = []

    for epoch in range(epochs):
        model.train()
        total_loss, correct, total = 0, 0, 0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            correct += (outputs.argmax(dim=1) == labels).sum().item()
            total += labels.size(0)

        train_acc = correct / total
        train_acc_history.append(train_acc)

        # Оценка на валидации
        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                correct += (outputs.argmax(dim=1) == labels).sum().item()
                total += labels.size(0)
        val_acc = correct / total
        val_acc_history.append(val_acc)

        print(f"Epoch {epoch+1}/{epochs} - Loss: {total_loss:.4f}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}")
        scheduler.step()

    # Построение графика
    plt.plot(range(1, epochs+1), train_acc_history, label="Train Accuracy")
    plt.plot(range(1, epochs+1), val_acc_history, label="Validation Accuracy")
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.legend()
    plt.title("Training and Validation Accuracy")
    plt.show()

    return model

In [107]:
trained_model = train_model(model, train_loader, val_loader, epochs=5)

  with autocast():


Epoch 1/5 - Loss: 183.1415, Train Acc: 0.7171, Val Acc: 0.8070
Epoch 2/5 - Loss: 539.9093, Train Acc: 0.6497, Val Acc: 0.8070


KeyboardInterrupt: 

In [None]:
from sklearn.metrics import f1_score


def evaluate_on_test_f1(model, test_loader):
    model.eval()  # Переключаем модель в режим оценки
    all_labels = []
    all_preds = []

    with torch.no_grad():  # Отключаем вычисление градиентов
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = outputs.max(1)  # Получаем предсказания модели

            all_labels.extend(labels.cpu().numpy())  # Собираем истинные метки
            all_preds.extend(predicted.cpu().numpy())  # Собираем предсказания модели

    # Вычисляем F1-меру
    f1 = f1_score(all_labels, all_preds, average="weighted")  # Средневзвешенная F1-метрика
    print(f"Test F1 Score: {f1:.4f}")  # Выводим F1-меру

# После завершения обучения модели вызываем эту функцию для тестовой выборки
evaluate_on_test_f1(trained_model, test_loader)


Test F1 Score: 0.9126


In [None]:
torch.save(trained_model.state_dict(), "../experiments/weights/resnet18_melanoma.pth")
print("✅ Модель сохранена!")

✅ Модель сохранена!
