In [32]:
import torch
import torch.nn as nn
from torch.optim import AdamW
from tqdm import tqdm
from timm import create_model

from ffcv.writer import DatasetWriter
from ffcv.fields import RGBImageField, IntField
from ffcv.loader import Loader, OrderOption
from ffcv.transforms import ToTensor, ToDevice, ToTorchImage, Cutout
from ffcv.fields.decoders import IntDecoder, RandomResizedCropRGBImageDecoder
from torchvision.datasets import CIFAR10

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# 1) Создаем и сохраняем FFCV датасет (разово)
def convert_cifar10_to_ffcv():
    train_ds = CIFAR10(root='./data', train=True, download=True)
    test_ds = CIFAR10(root='./data', train=False, download=True)

    train_writer = DatasetWriter('cifar10_train.beton', {
        'image': RGBImageField(max_resolution=256),
        'label': IntField()
    })
    train_writer.from_indexed_dataset(train_ds)

    test_writer = DatasetWriter('cifar10_test.beton', {
        'image': RGBImageField(max_resolution=256),
        'label': IntField()
    })
    test_writer.from_indexed_dataset(test_ds)


In [28]:
# Раскомментируй и запусти один раз
convert_cifar10_to_ffcv()

100%|██████████| 50000/50000 [00:00<00:00, 99676.18it/s] 
100%|██████████| 10000/10000 [00:00<00:00, 99753.94it/s]


In [37]:
from ffcv.transforms import NormalizeImage
import numpy as np

# 2) Настраиваем пайплайны и DataLoader-ы
batch_size = 128
num_workers = 4


mean = np.array([0.4914, 0.4822, 0.4465])
std = np.array([0.2023, 0.1994, 0.2010])

train_image_pipeline = [
    RandomResizedCropRGBImageDecoder((224, 224)),
    Cutout(crop_size=16),                # если хочешь, можно убрать
    ToTensor(),
    ToTorchImage(),
    NormalizeImage(mean, std, np.float32),  # Нормализация + float32
    ToDevice(device)
]

train_label_pipeline = [
    IntDecoder(),
    ToTensor(),
    ToDevice(device)
]

test_image_pipeline = [
    RandomResizedCropRGBImageDecoder((224, 224)),
    ToTensor(),
    ToTorchImage(),
    NormalizeImage(mean, std, np.float32),
    ToDevice(device)
]
test_label_pipeline = [
    IntDecoder(),
    ToTensor(),
    ToDevice(device)
]

train_loader = Loader('cifar10_train.beton', batch_size=batch_size, num_workers=num_workers,
                      order=OrderOption.RANDOM,
                      pipelines={
                          'image': train_image_pipeline,
                          'label': train_label_pipeline
                      })

test_loader = Loader('cifar10_test.beton', batch_size=batch_size, num_workers=num_workers,
                     order=OrderOption.SEQUENTIAL,
                     pipelines={
                         'image': test_image_pipeline,
                         'label': test_label_pipeline
                     })

AssertionError: Can't be in JIT mode and on the GPU

In [35]:
# 3) Создаем модель DeiT (с timm)
model = create_model('deit_base_distilled_patch16_224', pretrained=True, num_classes=10)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = AdamW(model.parameters(), lr=3e-4)

# 4) Обучение и валидация
epochs = 10

for epoch in range(epochs):
    model.train()
    train_correct = 0
    train_total = 0

    for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs} train"):
        images, labels = batch[0], batch[1].long().squeeze()
        preds = model(images)
        loss = criterion(preds, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_total += labels.size(0)
        train_correct += (preds.argmax(dim=1) == labels).sum().item()


    train_acc = 100 * train_correct / train_total
    print(f"Train accuracy: {train_acc:.2f}%")

    model.eval()
    test_correct = 0
    test_total = 0

    with torch.no_grad():
        for batch in tqdm(test_loader, desc=f"Epoch {epoch+1}/{epochs} val"):
            images, labels = batch['image'], batch['label'].long().squeeze()
            preds = model(images)
            test_total += labels.size(0)
            test_correct += (preds.argmax(dim=1) == labels).sum().item()

    test_acc = 100 * test_correct / test_total
    print(f"Validation accuracy: {test_acc:.2f}%")

Epoch 1/10 train:   0%|          | 0/390 [00:00<?, ?it/s]


RuntimeError: Input type (unsigned char) and bias type (float) should be the same

In [9]:
from ffcv.writer import DatasetWriter
from ffcv.fields import IntField, RGBImageField
from torchvision.datasets import CIFAR10
from torchvision import transforms

train_dataset = CIFAR10(root='./data', train=True, download=True)
test_dataset = CIFAR10(root='./data', train=False, download=True)

writer = DatasetWriter('cifar_train.beton', {
    'image': RGBImageField(write_mode='jpg'),
    'label': IntField()
})
writer.from_indexed_dataset(train_dataset)

writer = DatasetWriter('cifar_test.beton', {
    'image': RGBImageField(write_mode='jpg'),
    'label': IntField()
})
writer.from_indexed_dataset(test_dataset)


100%|██████████| 50000/50000 [00:00<00:00, 159438.38it/s]
100%|██████████| 10000/10000 [00:00<00:00, 99691.35it/s]


In [19]:
import torch
from torch import nn
from torch.optim import AdamW
from ffcv.loader import Loader, OrderOption
from ffcv.transforms import ToDevice, ToTensor, NormalizeImage, RandomHorizontalFlip, RandomResizedCrop
from ffcv.transforms.common import Squeeze
from timm import create_model
from tqdm import tqdm
from ffcv.transforms import RandomResizedCrop


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [20]:
# === Параметры ===
BATCH_SIZE = 128
NUM_CLASSES = 10
EPOCHS = 20
LR = 3e-4

import numpy as np

mean = np.array([0.4914 * 255, 0.4822 * 255, 0.4465 * 255], dtype=np.float32)
std = np.array([0.2023 * 255, 0.1994 * 255, 0.2010 * 255], dtype=np.float32)


train_transforms = [
    RandomResizedCrop(224, (0.8, 1.0), (0.75, 1.333)),
    RandomHorizontalFlip(),
    ToTensor(),
    ToDevice(device),
    NormalizeImage(mean, std, type=np.float32)
]

label_pipeline = [Squeeze(), ToTensor(), ToDevice(device)]

train_loader = Loader('cifar_train.beton', batch_size=BATCH_SIZE, num_workers=4,
                      order=OrderOption.RANDOM,
                      pipelines={
                          'image': train_transforms,
                          'label': label_pipeline
                      })

test_loader = Loader('cifar_test.beton', batch_size=BATCH_SIZE, num_workers=2,
                     order=OrderOption.SEQUENTIAL,
                     pipelines={
                         'image': [
                             RandomResizedCrop((224, 224), scale=(1.0, 1.0)),  # resize only
                             ToTensor(),
                             ToDevice(device),
                             NormalizeImage(mean, std, type=np.float32)

                         ],
                         'label': label_pipeline
                     })


ModuleNotFoundError: No module named 'cupy'

In [None]:

# === Модель DeiT ===
model = create_model('deit_base_distilled_patch16_224', pretrained=True, num_classes=NUM_CLASSES)
model = model.to(device)

# === Оптимизатор и лосс ===
criterion = nn.CrossEntropyLoss()
optimizer = AdamW(model.parameters(), lr=LR)

# === Тренировка ===
for epoch in range(EPOCHS):
    model.train()
    total, correct = 0, 0
    for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}"):
        images, labels = batch['image'], batch['label'].long().squeeze()
        preds = model(images)
        loss = criterion(preds, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total += labels.size(0)
        correct += (preds.argmax(1) == labels).sum().item()

    acc = correct / total * 100
    print(f"Train Accuracy: {acc:.2f}%")

    # === Валидация ===
    model.eval()
    total, correct = 0, 0
    with torch.no_grad():
        for batch in test_loader:
            images, labels = batch['image'], batch['label'].long().squeeze()
            preds = model(images)
            total += labels.size(0)
            correct += (preds.argmax(1) == labels).sum().item()
    val_acc = correct / total * 100
    print(f"Validation Accuracy: {val_acc:.2f}%")


In [6]:
beton_path = './data/cifar10_train.beton'


train_loader = Loader(
    beton_path,
    batch_size=128,
    num_workers=4,
    order=OrderOption.RANDOM,
    drop_last=True,
    pipelines={
        'image': [
            SimpleRGBImageDecoder(),
            ToTensor(),
            ToDevice(device),
            ToTorchImage(),
            Convert(torch.float32),
        ],
        'label': [
            IntDecoder(),
            ToTensor(),
            ToDevice(device),
            Convert(torch.long)
        ]
    }
)


## Training

In [7]:
import timm
import torch.nn as nn
from transformers import DeiTForImageClassification, DeiTFeatureExtractor


model = timm.create_model('deit_base_distilled_patch16_224', pretrained=False, num_classes=10)
model = model.to(device)

In [8]:
from ffcv.loader import Loader, OrderOption
from ffcv.transforms import ToTensor, ToDevice, ToTorchImage, Cutout, NormalizeImage
from ffcv.fields.decoders import IntDecoder, RandomResizedCropRGBImageDecoder
import numpy as np

image_pipeline = [
    RandomResizedCropRGBImageDecoder((224, 224)),
    ToTensor(),
    ToTorchImage(),
    Convert(torch.float32),
    NormalizeImage(
        mean=np.array([0.485 * 255, 0.456 * 255, 0.406 * 255], dtype=np.float32),
        std=np.array([0.229 * 255, 0.224 * 255, 0.225 * 255], dtype=np.float32),
        type=np.float32
    )
]

label_pipeline = [
    IntDecoder(),
    ToTensor(),
    Convert(torch.long)
]

train_loader = Loader(
    './data/cifar10_train.beton',
    batch_size=16,
    num_workers=2,
    order=OrderOption.RANDOM,
    drop_last=True,
    pipelines={
        'image': image_pipeline,
        'label': label_pipeline
    },
    os_cache=False,      # отключить JIT
    recompile=False      # тоже отключить повторную компиляцию (JIT)
)

KeyboardInterrupt: 

In [9]:
import gc
import torch

gc.collect()
torch.cuda.empty_cache()

In [10]:
print(torch.cuda.memory_summary(device=None, abbreviated=False))

|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      | 335218 KiB | 335218 KiB | 335218 KiB |      0 B   |
|       from large pool | 334080 KiB | 334080 KiB | 334080 KiB |      0 B   |
|       from small pool |   1138 KiB |   1138 KiB |   1138 KiB |      0 B   |
|---------------------------------------------------------------------------|
| Active memory         | 335218 KiB | 335218 KiB | 335218 KiB |      0 B   |
|       from large pool | 334080 KiB | 334080 KiB | 334080 KiB |      0 B   |
|       from small pool |   1138 KiB |   1138 KiB |   1138 KiB |      0 B   |
|---------------------------------------------------------------

In [11]:
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm

optimizer = optim.AdamW(model.parameters(), lr=5e-4, weight_decay=0.05)
criterion = nn.CrossEntropyLoss()
scaler = torch.cuda.amp.GradScaler()


for epoch in range(3):
    model.train()
    total_loss = 0
    correct = 0
    total = 0

    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
        images, labels = images.to(device), labels.squeeze(1).to(device)  # squeeze для labels

        optimizer.zero_grad()

        with torch.cuda.amp.autocast():
            output = model(pixel_values=images).logits
            loss = criterion(output, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()


        total_loss += loss.item()
        _, pred = output.max(1)
        correct += pred.eq(labels).sum().item()
        total += labels.size(0)

    acc = 100. * correct / total
    print(f"Epoch {epoch+1} — Loss: {total_loss:.3f}, Accuracy: {acc:.2f}%")


  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():
Epoch 1: 100%|██████████| 3125/3125 [03:35<00:00, 14.50it/s]


Epoch 1 — Loss: 6199.151, Accuracy: 25.07%


Epoch 2: 100%|██████████| 3125/3125 [03:33<00:00, 14.66it/s]


Epoch 2 — Loss: 5571.412, Accuracy: 33.98%


Epoch 3: 100%|██████████| 3125/3125 [03:33<00:00, 14.64it/s]

Epoch 3 — Loss: 5231.953, Accuracy: 38.44%



