In [23]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from torchvision import models
from tqdm import tqdm
import mlflow
import mlflow.pytorch

In [26]:
# Трансформация: преобразуем изображения в тензоры и нормализуем их
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # Нормализация изображений
])

In [27]:
train_dataset = torchvision.datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)

In [28]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [29]:
# Модель ResNet-18
model = models.resnet18(pretrained=True)

# Изменим последний слой, чтобы соответствовать 10 классам Fashion MNIST
model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
model.fc = nn.Linear(model.fc.in_features, 10)

# Перемещаем модель на доступное устройство (GPU, если оно есть)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Определим функцию потерь и оптимизатор
criterion = nn.CrossEntropyLoss()  # Для многоклассовой классификации
optimizer = optim.Adam(model.parameters(), lr=0.001)



In [30]:
print(device)

cuda


In [12]:
import sys

print(torch.__version__)
print(sys.version)

2.4.1+cu124
3.10.0 (tags/v3.10.0:b494f59, Oct  4 2021, 19:00:18) [MSC v.1929 64 bit (AMD64)]


In [13]:
print(torch.cuda.is_available())

True


In [31]:
def train_model_with_mlflow(model, train_loader, criterion, optimizer, epochs=5):
    mlflow.start_run()  # Начинаем новый эксперимент

    # Логирование гиперпараметров
    mlflow.log_param("epochs", epochs)
    mlflow.log_param("batch_size", 64)
    mlflow.log_param("learning_rate", 0.001)

    model.train()  # Устанавливаем модель в режим обучения

    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        progress_bar = tqdm(train_loader, desc=f'Epoch {epoch + 1}/{epochs}')

        for images, labels in progress_bar:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            # Логируем метрики на каждой итерации
            mlflow.log_metric("loss", loss.item())
            mlflow.log_metric("accuracy", 100 * correct / total)

            # Обновляем прогресс-бар
            progress_bar.set_postfix({'Loss': f'{loss.item():.4f}', 'Accuracy': f'{100 * correct / total:.2f}%'})

        # Логируем средние метрики по эпохе
        mlflow.log_metric("epoch_loss", running_loss / len(train_loader))
        mlflow.log_metric("epoch_accuracy", 100 * correct / total)

        print(f"Epoch {epoch + 1}/{epochs} completed. Loss: {running_loss / len(train_loader):.4f}, Accuracy: {100 * correct / total:.2f}%")

    # Логируем модель
    mlflow.pytorch.log_model(model, "model")

    mlflow.end_run()  # Закрываем эксперимент

In [15]:
# Функция для оценки модели на тестовом наборе
def evaluate_model(model, test_loader):
    model.eval()  # Переводим модель в режим оценки
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Test Accuracy: {100 * correct / total:.2f}%")

In [33]:
mlflow.end_run()

In [34]:
train_model_with_mlflow(model, train_loader, criterion, optimizer, epochs=5)

Epoch 1/5: 100%|██████████| 938/938 [00:36<00:00, 25.81it/s, Loss=0.4247, Accuracy=82.58%]


Epoch 1/5 completed. Loss: 0.4858, Accuracy: 82.58%


Epoch 2/5: 100%|██████████| 938/938 [00:35<00:00, 26.35it/s, Loss=0.3881, Accuracy=87.90%]


Epoch 2/5 completed. Loss: 0.3381, Accuracy: 87.90%


Epoch 3/5: 100%|██████████| 938/938 [00:35<00:00, 26.40it/s, Loss=0.4820, Accuracy=89.66%]


Epoch 3/5 completed. Loss: 0.2862, Accuracy: 89.66%


Epoch 4/5: 100%|██████████| 938/938 [00:35<00:00, 26.71it/s, Loss=0.1980, Accuracy=90.42%]


Epoch 4/5 completed. Loss: 0.2635, Accuracy: 90.42%


Epoch 5/5: 100%|██████████| 938/938 [00:35<00:00, 26.58it/s, Loss=0.1448, Accuracy=91.44%]


Epoch 5/5 completed. Loss: 0.2329, Accuracy: 91.44%




In [20]:

# Оценим модель на тестовых данных
evaluate_model(model, test_loader)

Test Accuracy: 90.39%
