<a href="https://colab.research.google.com/github/Polqer/diplommel1/blob/main/mat_%D0%BE%D1%82%D0%B4%D0%B5%D0%BB%D1%8C%D0%BD%D0%B0%D1%8F_%D0%BD%D0%B5%D0%B9%D1%80%D0%BE%D0%BD%D0%BD%D0%B0%D1%8F.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install efficientnet-pytorch


Collecting efficientnet-pytorch
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: efficientnet-pytorch
  Building wheel for efficientnet-pytorch (setup.py) ... [?25l[?25hdone
  Created wheel for efficientnet-pytorch: filename=efficientnet_pytorch-0.7.1-py3-none-any.whl size=16424 sha256=702da1752c07e7846aec058b6519803d8ea4d88a02faacaecbbecabb62d16fc5
  Stored in directory: /root/.cache/pip/wheels/8b/6f/9b/231a832f811ab6ebb1b32455b177ffc6b8b1cd8de19de70c09
Successfully built efficientnet-pytorch
Installing collected packages: efficientnet-pytorch
Successfully installed efficientnet-pytorch-0.7.1


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from scipy.io import loadmat
from pathlib import Path
from efficientnet_pytorch import EfficientNet
import numpy as np
import wandb
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score

from google.colab import drive
drive.mount('/content/drive')
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
if device.type == "cuda":
    print(f"GPU: {torch.cuda.get_device_name(0)}")
# ======== Настройки ========

train_dir= '/content/drive/MyDrive/datamat/train'
val_dir = '/content/drive/MyDrive/datamat/test'
wandb.init(project="my-awesome-project", name= "matEfficientnet")
batch_size = 16
num_epochs = 10
learning_rate = 0.001
input_size = (16, 256, 256)  # (channels, height, width)
num_classes = 3  # "mm", "nn", "other"


class FocalLoss(nn.Module):
    def __init__(self, gamma=2., alpha=0.25, num_classes=3):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.alpha = alpha
        self.num_classes = num_classes

    def forward(self, inputs, targets):
        inputs = torch.clamp(inputs, 1e-7, 1 - 1e-7)  # Чтобы избежать логарифмирования 0
        targets = torch.eye(self.num_classes).to(inputs.device).index_select(dim=0, index=targets)  # Преобразуем метки в one-hot
        cross_entropy_loss = -targets * torch.log(inputs)
        loss = self.alpha * torch.pow(1 - inputs, self.gamma) * cross_entropy_loss
        return loss.sum(dim=1).mean()  # Среднее по батчу

# ======== Кастомный датасет ========


class HyperspectralDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = Path(root_dir)  # Путь к корню директории с данными
        self.transform = transform
        self.classes = sorted([d.name for d in self.root_dir.iterdir() if d.is_dir()])  # Список классов из папок
        self.class_to_idx = {cls_name: idx for idx, cls_name in enumerate(self.classes)}  # Индексация классов
        self.files = []

        # Проходим по всем классам и добавляем файлы
        for cls_name in self.classes:
            class_dir = self.root_dir / cls_name
            for file_path in class_dir.glob("*.mat"):  # Поиск .mat файлов в каждой папке
                self.files.append((file_path, self.class_to_idx[cls_name]))  # Добавляем файл и метку

    def __len__(self):
        return len(self.files)  # Возвращаем общее количество файлов

    def __getitem__(self, idx):
        file_path, label = self.files[idx]  # Получаем путь к файлу и метку класса
        mat_data = loadmat(file_path)  # Загружаем данные из .mat файла

        # Проверка наличия ожидаемого ключа в файле
        if 'DataCubeC' not in mat_data:
            raise KeyError(f"Ключ 'DataCubeC' не найден в файле {file_path}")

        # Получаем изображение
        image = mat_data['DataCubeC']
        image = np.transpose(image, (2, 0, 1))  # Преобразуем в формат (channels, height, width)


        # Преобразуем изображение в тензор PyTorch
        image = torch.tensor(image, dtype=torch.float32)


        # Применяем трансформации (если есть)
        if self.transform:
            image = self.transform(image)

        return image, label
# ======== Модель с EfficientNet3D ========
class EfficientNet3D(nn.Module):
    def __init__(self, num_classes):
        super(EfficientNet3D, self).__init__()

        # Предположим, что мы используем предобученную модель EfficientNet
        self.model = EfficientNet.from_pretrained('efficientnet-b0')

        # Изменяем первый слой свертки, чтобы принимать 16 каналов
        in_channels = 16  # Количество каналов в ваших гиперспектральных изображениях
        self.model._conv_stem = nn.Conv2d(in_channels=in_channels,
                                          out_channels=self.model._conv_stem.out_channels,
                                          kernel_size=self.model._conv_stem.kernel_size,
                                          stride=self.model._conv_stem.stride,
                                          padding=self.model._conv_stem.padding,
                                          bias=False)

        # Заменяем последний слой для классификации 3-х классов
        in_features = self.model._fc.in_features
        self.model._fc = nn.Linear(in_features, num_classes)

    def forward(self, x):
        return self.model(x)


def calculate_metrics(y_true, y_pred, y_true_proba, y_pred_proba):
    return {
        'accuracy': accuracy_score(y_true=y_true, y_pred=y_pred),
        'confusion_matrix': confusion_matrix(y_true=y_true, y_pred=y_pred),
        'micro/precision': precision_score(y_true=y_true, y_pred=y_pred, average='micro', zero_division=0),
        'micro/recall': recall_score(y_true=y_true, y_pred=y_pred, average='micro', zero_division=0),
        'micro/f1': f1_score(y_true=y_true, y_pred=y_pred, average='micro', zero_division=0),
        'micro/roc_auc_score': roc_auc_score(y_true, y_pred_proba, multi_class='ovr', average='macro'),
        'macro/precision': precision_score(y_true=y_true, y_pred=y_pred, average='macro', zero_division=0),
        'macro/recall': recall_score(y_true=y_true, y_pred=y_pred, average='macro', zero_division=0),
        'macro/f1': f1_score(y_true=y_true, y_pred=y_pred, average='macro', zero_division=0),
        'roc_auc_score':  roc_auc_score(y_true=y_true_proba, y_score=y_pred_proba, average=None, multi_class='ovr'),
        'weighted/precision': precision_score(y_true=y_true, y_pred=y_pred, average='weighted', zero_division=0),
        'weighted/recall': recall_score(y_true=y_true, y_pred=y_pred, average='weighted', zero_division=0),
        'weighted/f1': f1_score(y_true=y_true, y_pred=y_pred, average='weighted', zero_division=0)
    }

def train_and_validate(train_dir, val_dir):
    # Трансформации
    transform = transforms.Compose([transforms.Normalize(mean=[0.5], std=[0.5])])

    # Загружаем данные
    train_dataset = HyperspectralDataset(train_dir, transform=transform)
    val_dataset = HyperspectralDataset(val_dir, transform=transform)

    print(train_dataset.files)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    # Инициализация модели
    model = EfficientNet3D(num_classes=num_classes)
    model = model.cuda() if torch.cuda.is_available() else model

    # Определение потерь и оптимизатора
    criterion = FocalLoss(gamma=2., alpha=0.25, num_classes=num_classes)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Тренировка
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        all_labels = []
        all_preds = []
        all_probs = []

        for inputs, labels in train_loader:
            inputs, labels = inputs.cuda() if torch.cuda.is_available() else inputs, labels.cuda() if torch.cuda.is_available() else labels

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(predicted.cpu().numpy())
            all_probs.extend(torch.softmax(outputs, dim=1).detach().cpu().numpy())


        epoch_loss = running_loss / len(train_loader)
        epoch_accuracy = correct / total * 100
        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%")

        # Логирование метрик в W&B
        train_metrics = calculate_metrics(all_labels, all_preds, np.array(all_labels), np.array(all_probs))
        wandb.log({
            'train_loss': epoch_loss,
            'train_accuracy': epoch_accuracy,
            **train_metrics
        })

        # Валидация
        model.eval()
        val_correct = 0
        val_total = 0
        val_labels = []
        val_preds = []
        val_probs = []

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.cuda() if torch.cuda.is_available() else inputs, labels.cuda() if torch.cuda.is_available() else labels

                outputs = model(inputs)
                _, predicted = torch.max(outputs, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

                val_labels.extend(labels.cpu().numpy())
                val_preds.extend(predicted.cpu().numpy())
                val_probs.extend(torch.softmax(outputs, dim=1).cpu().numpy())

        val_accuracy = val_correct / val_total * 100
        print(f"Validation Accuracy: {val_accuracy:.2f}%")

        # Логирование метрик в W&B для валидации
        val_metrics = calculate_metrics(val_labels, val_preds, np.array(val_labels), np.array(val_probs))
        wandb.log({
            'val_accuracy': val_accuracy,
            **val_metrics
        })

    # Завершение сессии W&B
    wandb.finish()


# ======== Запуск ========
train_and_validate(train_dir, val_dir)


Mounted at /content/drive
Using device: cuda:0
GPU: Tesla T4


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


[(PosixPath('/content/drive/MyDrive/datamat/train/DNcube/187.mat'), 0), (PosixPath('/content/drive/MyDrive/datamat/train/DNcube/185.mat'), 0), (PosixPath('/content/drive/MyDrive/datamat/train/DNcube/176.mat'), 0), (PosixPath('/content/drive/MyDrive/datamat/train/DNcube/170.mat'), 0), (PosixPath('/content/drive/MyDrive/datamat/train/DNcube/174.mat'), 0), (PosixPath('/content/drive/MyDrive/datamat/train/DNcube/175.mat'), 0), (PosixPath('/content/drive/MyDrive/datamat/train/DNcube/182.mat'), 0), (PosixPath('/content/drive/MyDrive/datamat/train/DNcube/186.mat'), 0), (PosixPath('/content/drive/MyDrive/datamat/train/DNcube/173.mat'), 0), (PosixPath('/content/drive/MyDrive/datamat/train/DNcube/184.mat'), 0), (PosixPath('/content/drive/MyDrive/datamat/train/DNcube/179.mat'), 0), (PosixPath('/content/drive/MyDrive/datamat/train/DNcube/188.mat'), 0), (PosixPath('/content/drive/MyDrive/datamat/train/DNcube/168.mat'), 0), (PosixPath('/content/drive/MyDrive/datamat/train/DNcube/189.mat'), 0), (Posi

Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth" to /root/.cache/torch/hub/checkpoints/efficientnet-b0-355c32eb.pth
100%|██████████| 20.4M/20.4M [00:00<00:00, 362MB/s]


Loaded pretrained weights for efficientnet-b0
Epoch 1/10, Loss: 0.4203, Accuracy: 36.80%
Validation Accuracy: 44.07%
Epoch 2/10, Loss: 0.0030, Accuracy: 41.13%
Validation Accuracy: 33.90%
Epoch 3/10, Loss: 0.0005, Accuracy: 44.59%
Validation Accuracy: 25.42%
Epoch 4/10, Loss: 0.0000, Accuracy: 39.39%
Validation Accuracy: 30.51%
Epoch 5/10, Loss: 0.0003, Accuracy: 35.93%
Validation Accuracy: 28.81%
Epoch 6/10, Loss: 0.0000, Accuracy: 39.39%
Validation Accuracy: 28.81%
Epoch 7/10, Loss: 0.0000, Accuracy: 40.69%
Validation Accuracy: 28.81%
Epoch 8/10, Loss: 0.0170, Accuracy: 33.33%
Validation Accuracy: 28.81%
Epoch 9/10, Loss: 0.0000, Accuracy: 36.80%
Validation Accuracy: 30.51%
Epoch 10/10, Loss: 0.0000, Accuracy: 36.80%
Validation Accuracy: 28.81%


0,1
accuracy,▅█▇▄█▁▆▃▅▂▆▂▇▂▄▂▅▃▅▂
macro/f1,▆▂▇▃█▂▆▂▆▁▆▁▇▁▅▁▅▂▆▂
macro/precision,▅▂▆▂█▄▇▃▇▁▆▁▇▁▅▁▆▄█▃
macro/recall,▄▄▆▁█▂▆▄▅▄▆▄▇▄▃▄▅▄▅▃
micro/f1,▅█▇▄█▁▆▃▅▂▆▂▇▂▄▂▅▃▅▂
micro/precision,▅█▇▄█▁▆▃▅▂▆▂▇▂▄▂▅▃▅▂
micro/recall,▅█▇▄█▁▆▃▅▂▆▂▇▂▄▂▅▃▅▂
micro/roc_auc_score,▄▂▆▃█▂▆▁▆▅▆▅▅▃▄▄▆▄▆▄
train_accuracy,▃▆█▅▃▅▆▁▃▃
train_loss,█▁▁▁▁▁▁▁▁▁

0,1
accuracy,0.28814
macro/f1,0.16911
macro/precision,0.20635
macro/recall,0.32655
micro/f1,0.28814
micro/precision,0.28814
micro/recall,0.28814
micro/roc_auc_score,0.50888
train_accuracy,36.79654
train_loss,0.0


In [None]:
import os
from scipy.io import loadmat
import torch
from torch.utils.data import Dataset
from torchvision import transforms

class YourDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.files = []  # Список файлов и меток
        self.classes = os.listdir(root_dir)  # Список классов (имена папок)
        self.class_to_idx = {cls: idx for idx, cls in enumerate(self.classes)}  # Маппинг класса на метку

        # Собираем все файлы и метки
        for label, class_name in enumerate(self.classes):
            class_dir = os.path.join(root_dir, class_name)
            for file_name in os.listdir(class_dir):
                if file_name.endswith('.mat'):  # Проверка на формат .mat
                    file_path = os.path.join(class_dir, file_name)
                    self.files.append((file_path, label))  # Добавляем путь и метку

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        file_path, label = self.files[idx]
        mat_data = loadmat(file_path)

        # Предполагаем, что данные в файле под ключом 'DataCubeC'
        image = mat_data['DataCubeC']

        # Преобразование данных (если необходимо)
        if self.transform:
            image = self.transform(image)

        return image, label
import torch
from torch.utils.data import DataLoader

# Путь к вашей папке с данными
train_dir= '/content/drive/MyDrive/datamat/train'

# Преобразования, если они нужны
transform = transforms.Compose([
    transforms.ToTensor(),
    # Добавьте другие преобразования, если нужно
])

# Создаем датасет
dataset = YourDataset(root_dir=train_dir, transform=transform)

# Создаем DataLoader
train_loader = DataLoader(dataset, batch_size=16, shuffle=True)

# Проверим, как загружаются данные
for images, labels in train_loader:
    print(f"Images shape: {images.shape}, Labels: {labels}")
    break  # Проверим только первый батч


Images shape: torch.Size([16, 16, 272, 512]), Labels: tensor([2, 1, 0, 2, 1, 2, 2, 2, 0, 0, 0, 2, 0, 0, 2, 2])
