In [5]:
import torch
import torch.nn as nn
import torchvision
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchmetrics import Accuracy, Precision, Recall, F1Score
import pandas as pd
from torch_trainer import *
from timm import optim, scheduler
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
val_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Для CPU уменьшим batch_size; для GPU можно увеличить
batch_size = 128
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, )
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, )

# ----------------------------
# 4. Модель
# ----------------------------
model = torchvision.models.resnet18(weights=None, num_classes=10)

# ----------------------------
# 5. Устройство
# ----------------------------
device = (
    torch.device('cuda') if torch.cuda.is_available()
    else torch.device('mps') if torch.backends.mps.is_available()
    else torch.device('cpu')
)
print(f'Using device: {device}')
    
model.to(device)

# ----------------------------
# 6. Метрики (из torchmetrics)
# ----------------------------
num_classes = 10
task = 'multiclass'

metrics = {
    'acc': Accuracy(task=task, num_classes=num_classes).to(device),
    'prec': Precision(task=task, num_classes=num_classes, average='macro').to(device),
    'rec': Recall(task=task, num_classes=num_classes, average='macro').to(device),
    'f1': F1Score(task=task, num_classes=num_classes, average='macro').to(device),
}

# torchmetrics возвращает tensor → оборачиваем в лямбду для совместимости
def make_metric_fn(metric_obj):
    return lambda preds, target: metric_obj(preds, target)

wrapped_metrics = {name: make_metric_fn(metric) for name, metric in metrics.items()}

# ----------------------------
# 7. Оптимизатор и критерий
# ----------------------------
criterion = nn.CrossEntropyLoss().to(device)

# Эффективный batch_size = 128 (без аккумуляции)
# Если хотите эмулировать batch_size=512 → accumulation_steps=4 и lr *= 4
# optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)  # базовый LR

# # Scheduler
# scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=3)

optimizer = timm.optim.create_optimizer_v2(
    model,
    opt='adamw',
    lr=3e-4,
    weight_decay=0.01
)
scheduler, _ = timm.scheduler.create_scheduler_v2(
optimizer,
    sched='cosine',
    num_epochs=20,
    warmup_epochs=2,
    min_lr=1e-5
)

# ----------------------------
# 8. Запуск обучения
# ----------------------------
history, batch_hist = fit(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    optimizer=optimizer,
    criterion=criterion,
    metrics=wrapped_metrics,
    epochs=20,
    scheduler=scheduler,
    patience=7,
    min_delta=0.001,
    grad_clip=1.0,
    use_amp=(device.type == 'cuda'),      # AMP только на GPU
    ema_decay=0.999,
    device=device,
    checkpoint_path='best_model_checkpoint.pt',
    verbose=True,
    monitor_metric='f1',                  # early stopping по F1
    mode='max',
    accumulation_steps=1,                 # без аккумуляции (можно поставить 2, 4 и т.д.)
    return_batch_history = True,
)

print("\n Обучение завершено!")
print(f"Лучшая эпоха: {history.attrs['best_epoch'] + 1}")
print(f"Лучший F1: {history.attrs['best_score']:.4f}")

# Сохранить историю
history.to_csv('training_history.csv', index=False)

Train:   0%|          | 0/391 [00:00<?, ?it/s]

Val:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch 01 | Time: 96.9s | LR: 5.05e-04 | Train Loss: 2.0635 | Val Loss: 2.5068 | Val f1: 0.0451 ★


Train:   0%|          | 0/391 [00:00<?, ?it/s]

Val:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch 02 | Time: 97.1s | LR: 9.76e-04 | Train Loss: 1.3671 | Val Loss: 2.4940 | Val f1: 0.0184 


Train:   0%|          | 0/391 [00:00<?, ?it/s]

Val:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch 03 | Time: 88.4s | LR: 9.46e-04 | Train Loss: 1.0631 | Val Loss: 2.3471 | Val f1: 0.0184 


Train:   0%|          | 0/391 [00:00<?, ?it/s]

Val:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch 04 | Time: 87.4s | LR: 9.05e-04 | Train Loss: 0.8340 | Val Loss: 2.4170 | Val f1: 0.0657 ★


Train:   0%|          | 0/391 [00:00<?, ?it/s]

Val:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch 05 | Time: 85.7s | LR: 8.55e-04 | Train Loss: 0.6943 | Val Loss: 1.9593 | Val f1: 0.1776 ★


Train:   0%|          | 0/391 [00:00<?, ?it/s]

Val:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch 06 | Time: 86.9s | LR: 7.96e-04 | Train Loss: 0.5685 | Val Loss: 1.7274 | Val f1: 0.3125 ★


Train:   0%|          | 0/391 [00:00<?, ?it/s]

Val:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch 07 | Time: 85.6s | LR: 7.30e-04 | Train Loss: 0.4569 | Val Loss: 1.2723 | Val f1: 0.5228 ★


Train:   0%|          | 0/391 [00:00<?, ?it/s]

Val:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch 08 | Time: 86.1s | LR: 6.58e-04 | Train Loss: 0.3483 | Val Loss: 1.0612 | Val f1: 0.6373 ★


Train:   0%|          | 0/391 [00:00<?, ?it/s]

Val:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch 09 | Time: 104.1s | LR: 5.82e-04 | Train Loss: 0.2524 | Val Loss: 0.8729 | Val f1: 0.7067 ★


Train:   0%|          | 0/391 [00:00<?, ?it/s]

Val:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch 10 | Time: 118.0s | LR: 5.05e-04 | Train Loss: 0.1728 | Val Loss: 0.8579 | Val f1: 0.7326 ★


Train:   0%|          | 0/391 [00:00<?, ?it/s]

Val:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch 11 | Time: 87.2s | LR: 4.28e-04 | Train Loss: 0.1194 | Val Loss: 0.8762 | Val f1: 0.7528 ★


Train:   0%|          | 0/391 [00:00<?, ?it/s]

Val:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch 12 | Time: 87.6s | LR: 3.52e-04 | Train Loss: 0.0761 | Val Loss: 0.8958 | Val f1: 0.7736 ★


Train:   0%|          | 0/391 [00:00<?, ?it/s]

Val:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch 13 | Time: 87.2s | LR: 2.80e-04 | Train Loss: 0.0492 | Val Loss: 0.9976 | Val f1: 0.7746 ★


Train:   0%|          | 0/391 [00:00<?, ?it/s]

Val:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch 14 | Time: 87.1s | LR: 2.14e-04 | Train Loss: 0.0280 | Val Loss: 1.1028 | Val f1: 0.7768 ★


Train:   0%|          | 0/391 [00:00<?, ?it/s]

Val:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch 15 | Time: 87.7s | LR: 1.55e-04 | Train Loss: 0.0148 | Val Loss: 1.2010 | Val f1: 0.7740 


Train:   0%|          | 0/391 [00:00<?, ?it/s]

Val:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch 16 | Time: 87.2s | LR: 1.05e-04 | Train Loss: 0.0062 | Val Loss: 1.3391 | Val f1: 0.7731 


Train:   0%|          | 0/391 [00:00<?, ?it/s]

Val:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch 17 | Time: 87.2s | LR: 6.40e-05 | Train Loss: 0.0027 | Val Loss: 1.3958 | Val f1: 0.7756 


Train:   0%|          | 0/391 [00:00<?, ?it/s]

Val:   0%|          | 0/79 [00:00<?, ?it/s]

Epoch 18 | Time: 88.7s | LR: 3.42e-05 | Train Loss: 0.0011 | Val Loss: 1.4567 | Val f1: 0.7726 


Train:   0%|          | 0/391 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [8]:
batch_hist['train']

NameError: name 'batch_hist' is not defined

In [6]:
plot_batch_history(batch_hist, metric_name='acc', window=10)

NameError: name 'batch_hist' is not defined

In [2]:
from torch_trainer_v1 import *

[✓] Device: cuda | Seed: 42 | TF32: True


Train:   0%|          | 0/391 [00:00<?, ?it/s]

Val:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 01 | Time: 19.8s | LR: 3.00e-04 | Throughput: 2528 samples/s | Train Loss: 1.4298 | Val Loss: 2.3937 | Val f1: 0.0406 ★


Train:   0%|          | 0/391 [00:00<?, ?it/s]

 Async checkpoint saved to best_model.pt


Val:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 02 | Time: 20.6s | LR: 3.00e-04 | Throughput: 2427 samples/s | Train Loss: 1.0081 | Val Loss: 2.3072 | Val f1: 0.0405 


Train:   0%|          | 0/391 [00:00<?, ?it/s]

Val:   0%|          | 0/40 [00:00<?, ?it/s]

Epoch 03 | Time: 20.9s | LR: 3.00e-04 | Throughput: 2392 samples/s | Train Loss: 0.8078 | Val Loss: 1.8973 | Val f1: 0.2157 ★
 Async checkpoint saved to best_model.pt


In [34]:
import segmentation_models_pytorch as smp

# Используем встроенные функции SMP
class SMPCombinedLoss(nn.Module):
    def __init__(self, dice_weight=0.5):
        super().__init__()
        self.dice_loss = smp.losses.DiceLoss(mode='binary')
        self.bce_loss = smp.losses.SoftBCEWithLogitsLoss(smooth_factor=0.0)
        self.dice_weight = dice_weight

    def forward(self, pred, target):
        # Убедимся, что target имеет правильную размерность
        if target.dim() == 3:
            target = target.unsqueeze(1)  # Добавляем размерность канала
        target = target.float()
        
        dice = self.dice_loss(pred, target)
        bce = self.bce_loss(pred, target)
        return self.dice_weight * dice +  (1-self.dice_weight) * bce



# Метрики из SMP
def iou_score(pred, target):
    if target.dim() == 3:
        target = target.unsqueeze(1)  # Добавляем размерность канала
    # Преобразуем в бинарные предсказания
    pred_bin = (torch.sigmoid(pred) > 0.5).float()
    target = target.float()
    
    intersection = (pred_bin * target).sum((1, 2, 3))
    union = (pred_bin + target - pred_bin * target).sum((1, 2, 3))
    
    iou = (intersection + 1e-6) / (union + 1e-6)
    return iou.mean()

def f_score(pred, target):
    if target.dim() == 3:
        target = target.unsqueeze(1)  # Добавляем размерность канала
    # Преобразуем в бинарные предсказания
    pred_bin = (torch.sigmoid(pred) > 0.5).float()
    target = target.float()
    
    intersection = (pred_bin * target).sum((1, 2, 3))
    total_pred = pred_bin.sum((1, 2, 3))
    total_target = target.sum((1, 2, 3))
    
    f1 = (2 * intersection + 1e-6) / (total_pred + total_target + 1e-6)
    return f1.mean()

def accuracy_score(pred, target):
    if target.dim() == 3:
        target = target.unsqueeze(1)  # Добавляем размерность канала
    # Преобразуем в бинарные предсказания
    pred_bin = (torch.sigmoid(pred) > 0.5).float()
    target = target.float()
    
    correct = (pred_bin == target).float().sum((1, 2, 3))
    total = target.new_tensor(target.shape[1] * target.shape[2] * target.shape[3])
    
    acc = (correct + 1e-6) / (total + 1e-6)
    return acc.mean()

# Определим метрики
metrics = {
    'iou': iou_score,
    'f1': f_score,
    'acc': accuracy_score
}



In [36]:
LR = 0.001
criterion = SMPCombinedLoss(dice_weight=0.5)
criterion.to(device)
model.to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=LR)  # базовый LR
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=3)

In [37]:
# Запускаем обучение
history_df, batch_history = fit(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    optimizer=optimizer,
    criterion=criterion,
    metrics=metrics,
    epochs=1,
    scheduler = scheduler,
    device=device,
    checkpoint_path="best_segmentation_model.pt",
    monitor_metric="iou",
    mode="max",
    patience=10,
    min_delta=1e-4,
    grad_clip=1.0,
    use_amp=(device.type == 'cuda'),
    accumulation_steps=1,
    verbose=True,
    return_batch_history = True,
)

Train:   0%|          | 0/75 [00:00<?, ?it/s]

Val:   0%|          | 0/17 [00:00<?, ?it/s]

Epoch 01 | Time: 22.9s | LR: 1.00e-03 | Train Loss: 0.0927 | Val Loss: 0.0998 | Val iou: 0.8744 ★


In [39]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class UNetBlock(nn.Module):
    """Блок U-Net с двумя свертками и активацией"""
    def __init__(self, in_channels, out_channels, dropout=0.1):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, 3, padding=1)
        self.conv2 = nn.Conv2d(out_channels, out_channels, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.dropout = nn.Dropout2d(dropout)
        
    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.dropout(x)
        x = F.relu(self.bn2(self.conv2(x)))
        return x

class UNet(nn.Module):
    """Упрощенная архитектура U-Net"""
    def __init__(self, in_channels=3, out_channels=1, features=[64, 128, 256, 512]):
        super().__init__()
        self.encoder = nn.ModuleList()
        self.decoder = nn.ModuleList()
        self.pool = nn.MaxPool2d(2, 2)
        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
        
        # Encoder (downsampling)
        in_ch = in_channels
        for feature in features:
            self.encoder.append(UNetBlock(in_ch, feature))
            in_ch = feature
            
        # Bottleneck
        self.bottleneck = UNetBlock(features[-1], features[-1]*2)
        
        # Decoder (upsampling)
        in_ch = features[-1]*2  # channels after bottleneck
        for feature in reversed(features):
            self.decoder.append(
                nn.ConvTranspose2d(in_ch, feature, kernel_size=2, stride=2)
            )
            # После конкатенации: feature*2 (из skip connection + upsampled)
            self.decoder.append(UNetBlock(feature*2, feature))
            in_ch = feature
            
        # Output layer
        self.output = nn.Conv2d(features[0], out_channels, 1)
        
    def forward(self, x):
        skip_connections = []
        
        # Encoder
        for block in self.encoder:
            x = block(x)
            skip_connections.append(x)
            x = self.pool(x)
            
        # Bottleneck
        x = self.bottleneck(x)
        
        # Decoder
        skip_connections = skip_connections[::-1]  # Reverse for decoder
        
        for i in range(0, len(self.decoder), 2):
            x = self.decoder[i](x)  # ConvTranspose2d
            skip = skip_connections[i//2]
            
            # Ensure same spatial dimensions
            if x.shape[2:] != skip.shape[2:]:
                # Interpolate to match spatial dimensions
                x = F.interpolate(x, size=skip.shape[2:], mode='bilinear', align_corners=True)
            
            # Concatenate skip connection
            x = torch.cat([x, skip], dim=1)
            x = self.decoder[i+1](x)  # UNetBlock
            
        return self.output(x)

# Пример использования
model = UNet(in_channels=3, out_channels=1)
print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")

# Проверка размеров
x = torch.randn(1, 3, 256, 256)
with torch.no_grad():
    y = model(x)
    print(f"Input shape: {x.shape}")
    print(f"Output shape: {y.shape}")

Model parameters: 31,043,521
Input shape: torch.Size([1, 3, 256, 256])
Output shape: torch.Size([1, 1, 256, 256])


In [42]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import timm

class TimmUNet(nn.Module):
    """U-Net с использованием backbone из timm"""
    def __init__(self, 
                 backbone_name='mobilenetv3_small_100',  # легкий backbone
                 in_channels=3, 
                 out_channels=1,
                 pretrained=True):
        super().__init__()
        
        # Загружаем предобученную модель
        self.backbone = timm.create_model(
            backbone_name, 
            features_only=True, 
            pretrained=pretrained,
            out_indices=(0, 1, 2, 3)  # выбираем разные уровни
        )
        
        # Получаем количество каналов для каждого уровня
        self.feature_channels = self.backbone.feature_info.channels()
        
        # Уменьшаем количество каналов для легкости
        self.reduce_convs = nn.ModuleList()
        for ch in self.feature_channels:
            self.reduce_convs.append(
                nn.Conv2d(ch, 32, 1)  # уменьшаем до 32 каналов
            )
        
        # Decoder
        self.decoder_blocks = nn.ModuleList()
        
        # Для каждого уровня decoder создаем upsampling + conv
        for i in range(len(self.feature_channels) - 1):
            self.decoder_blocks.append(
                nn.ConvTranspose2d(32, 32, kernel_size=2, stride=2)  # upsampling
            )
            # После конкатенации: 32 (upsampled) + 32 (skip) = 64
            self.decoder_blocks.append(
                nn.Sequential(
                    nn.Conv2d(64, 32, 3, padding=1),
                    nn.BatchNorm2d(32),
                    nn.ReLU(inplace=True),
                    nn.Conv2d(32, 32, 3, padding=1),
                    nn.BatchNorm2d(32),
                    nn.ReLU(inplace=True)
                )
            )
        
        # Дополнительный upsampling до исходного размера
        # Если после decoder все еще меньше чем вход, добавляем еще upsampling
        self.final_upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
        
        # Final output layer
        self.output = nn.Conv2d(32, out_channels, 1)
        
    def forward(self, x):
        # Получаем признаки из backbone
        features = self.backbone(x)
        
        # Уменьшаем количество каналов
        reduced_features = []
        for feat, conv in zip(features, self.reduce_convs):
            reduced_features.append(conv(feat))
        
        # Начинаем с самого глубокого уровня
        x = reduced_features[-1]
        
        # Decoder с skip connections
        for i in range(0, len(self.decoder_blocks), 2):
            # Upsampling
            x = self.decoder_blocks[i](x)
            
            # Добавляем skip connection
            skip_idx = len(reduced_features) - 2 - (i // 2)
            if skip_idx >= 0:
                skip = reduced_features[skip_idx]
                
                # Убедимся, что размеры совпадают
                if x.shape[2:] != skip.shape[2:]:
                    x = F.interpolate(x, size=skip.shape[2:], mode='bilinear', align_corners=True)
                
                x = torch.cat([x, skip], dim=1)
            
            # Convolution block
            x = self.decoder_blocks[i+1](x)
        
        # Дополнительный upsampling до размера входа, если нужно
        if x.shape[2:] != features[0].shape[2:]:
            x = self.final_upsample(x)
        
        # Убедимся, что размер совпадает с входом
        if x.shape[2:] != torch.Size([256, 256]):
            x = F.interpolate(x, size=(256, 256), mode='bilinear', align_corners=True)
        
        return self.output(x)

# Пример использования
model = TimmUNet(
    backbone_name='mobilenetv3_small_100',  # очень легкий backbone
    in_channels=3, 
    out_channels=1,
    pretrained=True
)

print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")

# Проверка размеров
x = torch.randn(1, 3, 256, 256)
with torch.no_grad():
    y = model(x)
    print(f"Input shape: {x.shape}")
    print(f"Output shape: {y.shape}")

Unexpected keys (classifier.bias, classifier.weight, conv_head.bias, conv_head.weight) found while loading pretrained weights. This may be expected if model is being adapted.


Model parameters: 1,026,401
Input shape: torch.Size([1, 3, 256, 256])
Output shape: torch.Size([1, 1, 256, 256])
