## Импорты, параметры и константы

In [1]:
import os
import json
from pathlib import Path
from collections import Counter
import time
import sys

import numpy as np
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.models import efficientnet_b1, EfficientNet_B1_Weights

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import psutil
from tqdm.notebook import tqdm


DEFAULT_TRAIN_CSV = "data/scooters_label_train.csv"
DEFAULT_TEST_CSV  = "data/scooters_label_test.csv"
OUTPUT_DIR = "EfficientNetB1"  
IMG_SIZE = 240  
BATCH_SIZE = 64 
NUM_WORKERS = min(8, os.cpu_count())
NUM_EPOCHS = 100
LR = 5e-4
WEIGHT_DECAY = 1e-5
PATIENCE = 30
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
NUM_CLASSES = 3
SEED = 42
PRETRAINED = True
DATA_FRACTION = 0.5 
MIN_FREE_GB = 6.0

## Архитектура (модель, агументация, загрузка данных и т.д.)

In [2]:
def seed_everything(seed=SEED):
    import random
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

class RandomRotate90:
    """Случайный поворот на 0, 90, 180 или 270 градусов"""
    def __call__(self, img):
        angle = np.random.choice([0, 90, 180, 270])
        return img.rotate(angle) if angle != 0 else img


class ScooterDataset(Dataset):
    def __init__(self, csv_file, class_to_idx=None, transforms=None, sample_frac=1.0, cache=False, min_free_gb=1.0, max_cache_images = 22000):
        if isinstance(csv_file, pd.DataFrame):
            self.df = csv_file.copy()
        else:
            self.df = pd.read_csv(csv_file)

        if 0 < sample_frac < 1.0:
            self.df = self.df.sample(frac=sample_frac, random_state=SEED).reset_index(drop=True)

        self.labels = self.df["scooter"].astype(str).tolist()
        self.paths = self.df["path"].tolist()

        if class_to_idx is None:
            classes = sorted(list(set(self.labels)))
            self.class_to_idx = {c: i for i, c in enumerate(classes)}
        else:
            self.class_to_idx = class_to_idx

        self.targets = [self.class_to_idx[l] for l in self.labels]
        self.transforms = transforms
        self.cache = cache
        self.min_free_gb = min_free_gb
        self.cached_images = {}
        self.max_cache_images = max_cache_images

        if self.cache:
            self._cache_images_safely()

    def _cache_images_safely(self):
        total_cached = 0
        for idx, p in enumerate(tqdm(self.paths, mininterval=3, desc="Caching images")):
            if total_cached >= self.max_cache_images:
                print(f"Reached max_cache_images limit: {self.max_cache_images}")
                break
                
            mem = psutil.virtual_memory()
            free_gb = mem.available / 1e9
            if free_gb < self.min_free_gb:
                print(f"Stopping caching: only {free_gb:.2f} GB free after {total_cached} images")
                break

            try:
                img = Image.open(p).convert("RGB")
            except:
                img = Image.new("RGB", (IMG_SIZE, IMG_SIZE), (0, 0, 0))

            if self.transforms:
                img = self.transforms(img)

            self.cached_images[idx] = img
            total_cached += 1

        print(f"Cached {total_cached} / {len(self.paths)} images")

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        label = self.targets[idx]
        if idx in self.cached_images:
            img = self.cached_images[idx]
        else:
            p = self.paths[idx]
            try:
                img = Image.open(p).convert("RGB")
            except:
                img = Image.new("RGB", (IMG_SIZE, IMG_SIZE), (0, 0, 0))
            if self.transforms:
                img = self.transforms(img)
        return img, label


def get_transforms(img_size=IMG_SIZE):
    train_transforms = transforms.Compose([
        RandomRotate90(),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomApply([transforms.ColorJitter(brightness=0.3, contrast=0.3)], p=0.7),
        transforms.Resize((img_size, img_size)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
    ])
    val_transforms = transforms.Compose([
        transforms.Resize((img_size, img_size)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
    ])
    return train_transforms, val_transforms


def build_model(num_classes=NUM_CLASSES, pretrained=True):
    """Создание модели EfficientNet-B1"""
    weights = EfficientNet_B1_Weights.IMAGENET1K_V1 if pretrained else None
    model = efficientnet_b1(weights=weights)
    
    in_features = model.classifier[-1].in_features
    model.classifier[-1] = nn.Linear(in_features, num_classes)
    
    return model


def evaluate(model, dataloader, criterion, device, flag=True):
    model.eval()
    all_preds, all_targets = [], []
    running_loss = 0.0
    inference_times = []

    iterator = tqdm(dataloader, desc="Evaluating") if flag else dataloader

    with torch.no_grad():
        for imgs, labels in iterator:
            imgs = imgs.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)

            start = time.time()
            outputs = model(imgs)
            inference_times.append(time.time() - start)

            loss = criterion(outputs, labels)
            running_loss += loss.item() * imgs.size(0)

            preds = outputs.argmax(dim=1).cpu().numpy()
            all_preds.extend(preds.tolist())
            all_targets.extend(labels.cpu().numpy().tolist())

    avg_loss = running_loss / len(dataloader.dataset)
    avg_time_per_image = np.mean(inference_times) / imgs.size(0)
    total_time_per_image = np.sum(inference_times) / len(dataloader.dataset)

    acc = accuracy_score(all_targets, all_preds)
    prec = precision_score(all_targets, all_preds, average='macro', zero_division=0)
    rec = recall_score(all_targets, all_preds, average='macro', zero_division=0)
    f1 = f1_score(all_targets, all_preds, average='macro', zero_division=0)
    cm = confusion_matrix(all_targets, all_preds)

    print(f"Average inference time per image: {total_time_per_image*1000:.2f} ms")

    return {
        'loss': avg_loss, 'accuracy': acc, 'precision': prec, 'recall': rec, 'f1': f1,
        'confusion_matrix': cm, 'avg_inference_time': total_time_per_image
    }


def evaluate_test(model, dataloader, criterion, device, class_names, log=True):
    model.eval()
    all_preds, all_targets = [], []
    running_loss = 0.0
    inference_times = []

    iterator = tqdm(dataloader, desc="Evaluating on TEST set", leave=False) if log else dataloader

    with torch.no_grad():
        for imgs, labels in iterator:
            imgs = imgs.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)

            start = time.time()
            outputs = model(imgs)
            inference_times.append(time.time() - start)

            loss = criterion(outputs, labels)
            running_loss += loss.item() * imgs.size(0)

            preds = outputs.argmax(dim=1).cpu().numpy()
            all_preds.extend(preds.tolist())
            all_targets.extend(labels.cpu().numpy().tolist())

    avg_loss = running_loss / len(dataloader.dataset)
    acc = accuracy_score(all_targets, all_preds)
    prec = precision_score(all_targets, all_preds, average='macro', zero_division=0)
    rec = recall_score(all_targets, all_preds, average='macro', zero_division=0)
    f1 = f1_score(all_targets, all_preds, average='macro', zero_division=0)
    cm = confusion_matrix(all_targets, all_preds)

    print("\n---------------- Final TEST Evaluation ----------------")
    print(f"Loss:      {avg_loss:.4f}")
    print(f"Accuracy:  {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall:    {rec:.4f}")
    print(f"F1-score:  {f1:.4f}")
    print("\nConfusion Matrix:")
    print(cm)

    report = classification_report(
        all_targets, all_preds, 
        target_names=class_names, 
        output_dict=True, 
        zero_division=0
    )

    df_report = pd.DataFrame(report).transpose()
    print("\nPer-class metrics:")
    print(df_report.round(4))

    avg_time_per_image = np.sum(inference_times) / len(dataloader.dataset)
    print(f"\nAverage inference time per image: {avg_time_per_image*1000:.2f} ms")

    return {
        'loss': avg_loss,
        'accuracy': acc,
        'precision': prec,
        'recall': rec,
        'f1': f1,
        'confusion_matrix': cm,
        'per_class': df_report,
        'avg_inference_time': avg_time_per_image
    }


def train_loop(train_loader, val_loader, model, criterion, optimizer, device, num_epochs=NUM_EPOCHS):
    best_val_f1 = -float('inf')
    best_epoch = -1
    os.makedirs(OUTPUT_DIR, exist_ok=True)

    for epoch in range(1, num_epochs+1):
        model.train()
        running_loss = 0.0
        total_cpu_time = 0.0
        total_gpu_time = 0.0

        pbar = tqdm(train_loader, desc=f"Epoch {epoch}/{num_epochs}")
        for imgs, labels in pbar:
            start_cpu = time.time()
            imgs = imgs.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)
            total_cpu_time += time.time() - start_cpu

            start_gpu = time.time()
            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_gpu_time += time.time() - start_gpu

            running_loss += loss.item() * imgs.size(0)
            processed = min(((pbar.n + 1) * imgs.size(0)), len(train_loader.dataset))
            avg_loss_running = running_loss / processed
            pbar.set_postfix({'loss': f"{avg_loss_running:.4f}"})

        epoch_train_loss = running_loss / len(train_loader.dataset)

        # Валидация
        start_val = time.time()
        val_res = evaluate(model, val_loader, criterion, device, False)
        val_time = time.time() - start_val

        total_time = total_cpu_time + total_gpu_time + val_time
        cpu_percent = total_cpu_time / total_time * 100
        gpu_percent = total_gpu_time / total_time * 100
        val_percent = val_time / total_time * 100

        print(f"Epoch {epoch} summary: total_time={total_time:.1f}s, CPU={cpu_percent:.1f}%, GPU={gpu_percent:.1f}%, VAL={val_percent:.1f}%")
        print(f"Train loss={epoch_train_loss:.4f}, Val loss={val_res['loss']:.4f}, Val f1={val_res['f1']:.4f}, Val acc={val_res['accuracy']:.4f}")

        if val_res['f1'] > best_val_f1:
            best_val_f1 = val_res['f1']
            best_epoch = epoch
            ckpt_path = os.path.join(OUTPUT_DIR, 'efficientnet_b1_best.pth')
            torch.save({'epoch': epoch, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict()}, ckpt_path)
            print(f"Saved best model to {ckpt_path}")

        if epoch - best_epoch >= PATIENCE:
            print(f"Early stopping triggered")
            break

    print(f"Training finished. Best epoch: {best_epoch}")

## Загрузка данных и кэширование

In [3]:
seed_everything(SEED)
if DEVICE == "cuda":
    print(f"GPU: {torch.cuda.get_device_name(0)}")

train_transforms, val_transforms = get_transforms(IMG_SIZE)

full_train_df = pd.read_csv(DEFAULT_TRAIN_CSV)
train_df, val_df = train_test_split(
    full_train_df, 
    test_size=0.1, 
    random_state=SEED, 
    stratify=full_train_df['scooter']
)

test_df = pd.read_csv(DEFAULT_TEST_CSV)

if 0 < DATA_FRACTION < 1.0:
    train_df = train_df.sample(frac=DATA_FRACTION, random_state=SEED).reset_index(drop=True)
    val_df = val_df.sample(frac=DATA_FRACTION, random_state=SEED).reset_index(drop=True)
    test_df  = test_df.sample(frac=DATA_FRACTION, random_state=SEED).reset_index(drop=True)

classes = sorted(train_df['scooter'].unique())
class_to_idx = {c: i for i, c in enumerate(classes)}
print("Class to idx:", class_to_idx)

train_dataset = ScooterDataset(train_df, class_to_idx=class_to_idx, transforms=train_transforms, cache=True, min_free_gb=MIN_FREE_GB)
val_dataset   = ScooterDataset(val_df,   class_to_idx=class_to_idx, transforms=val_transforms,   cache=True, min_free_gb=MIN_FREE_GB)
test_dataset  = ScooterDataset(test_df, class_to_idx=class_to_idx, transforms=val_transforms, cache=False)

print(f"Train samples: {len(train_dataset)}, Val samples: {len(val_dataset)}, Test samples: {len(test_dataset)}")

counts = Counter(train_dataset.targets)
class_counts = [counts[i] for i in range(len(class_to_idx))]
total = sum(class_counts)
class_weights = [total/(len(class_counts)*c) if c>0 else 0.0 for c in class_counts]
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float).to(DEVICE)

pin_memory = DEVICE == "cuda"
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True,  num_workers=NUM_WORKERS, pin_memory=pin_memory)
val_loader   = DataLoader(val_dataset,   batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=pin_memory)
test_loader  = DataLoader(test_dataset,  batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=pin_memory)

GPU: NVIDIA L4
Class to idx: {'absent': 0, 'part': 1, 'present': 2}


Caching images:   0%|          | 0/17641 [00:00<?, ?it/s]

Cached 17641 / 17641 images


Caching images:   0%|          | 0/1960 [00:00<?, ?it/s]

Cached 1960 / 1960 images
Train samples: 17641, Val samples: 1960, Test samples: 4900


## Обучение

In [4]:
model = build_model(num_classes=len(class_to_idx), pretrained=PRETRAINED).to(DEVICE)
criterion = nn.CrossEntropyLoss(weight=class_weights_tensor)
optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)

train_loop(train_loader, val_loader, model, criterion, optimizer, DEVICE, NUM_EPOCHS)

Downloading: "https://download.pytorch.org/models/efficientnet_b1_rwightman-533bc792.pth" to /tmp/xdg_cache/torch/hub/checkpoints/efficientnet_b1_rwightman-533bc792.pth
100%|██████████| 30.1M/30.1M [00:00<00:00, 82.4MB/s]


Epoch 1/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.37 ms
Epoch 1 summary: total_time=41.8s, CPU=0.2%, GPU=88.8%, VAL=11.0%
Train loss=0.3936, Val loss=0.2946, Val f1=0.8560, Val acc=0.8893
Saved best model to EfficientNetB1/mobilenetv3_large_best.pth


Epoch 2/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 2 summary: total_time=30.4s, CPU=0.3%, GPU=85.8%, VAL=13.9%
Train loss=0.2334, Val loss=0.3331, Val f1=0.8758, Val acc=0.9148
Saved best model to EfficientNetB1/mobilenetv3_large_best.pth


Epoch 3/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.17 ms
Epoch 3 summary: total_time=30.3s, CPU=0.3%, GPU=85.6%, VAL=14.0%
Train loss=0.1634, Val loss=0.3753, Val f1=0.8846, Val acc=0.9199
Saved best model to EfficientNetB1/mobilenetv3_large_best.pth


Epoch 4/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 4 summary: total_time=30.2s, CPU=0.3%, GPU=85.7%, VAL=14.0%
Train loss=0.1235, Val loss=0.4222, Val f1=0.8598, Val acc=0.9077


Epoch 5/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 5 summary: total_time=30.3s, CPU=0.3%, GPU=85.7%, VAL=14.0%
Train loss=0.1011, Val loss=0.5640, Val f1=0.8847, Val acc=0.9214
Saved best model to EfficientNetB1/mobilenetv3_large_best.pth


Epoch 6/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.17 ms
Epoch 6 summary: total_time=30.6s, CPU=0.3%, GPU=85.4%, VAL=14.3%
Train loss=0.0665, Val loss=0.6137, Val f1=0.8781, Val acc=0.9204


Epoch 7/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 7 summary: total_time=30.3s, CPU=0.3%, GPU=85.7%, VAL=14.1%
Train loss=0.0525, Val loss=0.7638, Val f1=0.8718, Val acc=0.9158


Epoch 8/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 8 summary: total_time=30.3s, CPU=0.3%, GPU=85.7%, VAL=14.0%
Train loss=0.0614, Val loss=0.6826, Val f1=0.8728, Val acc=0.9051


Epoch 9/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.17 ms
Epoch 9 summary: total_time=30.3s, CPU=0.3%, GPU=85.7%, VAL=14.0%
Train loss=0.0648, Val loss=0.5339, Val f1=0.8746, Val acc=0.9143


Epoch 10/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 10 summary: total_time=30.3s, CPU=0.3%, GPU=85.6%, VAL=14.1%
Train loss=0.0420, Val loss=0.6804, Val f1=0.8915, Val acc=0.9224
Saved best model to EfficientNetB1/mobilenetv3_large_best.pth


Epoch 11/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 11 summary: total_time=30.2s, CPU=0.3%, GPU=85.7%, VAL=14.0%
Train loss=0.0446, Val loss=0.6141, Val f1=0.8740, Val acc=0.9066


Epoch 12/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 12 summary: total_time=30.3s, CPU=0.3%, GPU=85.7%, VAL=14.1%
Train loss=0.0481, Val loss=0.6699, Val f1=0.8714, Val acc=0.9122


Epoch 13/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 13 summary: total_time=30.3s, CPU=0.3%, GPU=85.7%, VAL=14.0%
Train loss=0.0212, Val loss=0.6666, Val f1=0.8822, Val acc=0.9143


Epoch 14/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.15 ms
Epoch 14 summary: total_time=30.3s, CPU=0.3%, GPU=85.6%, VAL=14.1%
Train loss=0.0358, Val loss=0.7105, Val f1=0.8832, Val acc=0.9128


Epoch 15/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.19 ms
Epoch 15 summary: total_time=30.4s, CPU=0.3%, GPU=85.8%, VAL=13.9%
Train loss=0.0246, Val loss=0.7684, Val f1=0.8809, Val acc=0.9199


Epoch 16/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 16 summary: total_time=30.3s, CPU=0.3%, GPU=85.6%, VAL=14.1%
Train loss=0.0359, Val loss=0.7271, Val f1=0.8881, Val acc=0.9230


Epoch 17/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 17 summary: total_time=30.3s, CPU=0.3%, GPU=85.7%, VAL=14.0%
Train loss=0.0387, Val loss=0.7491, Val f1=0.8905, Val acc=0.9245


Epoch 18/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 18 summary: total_time=30.4s, CPU=0.3%, GPU=85.4%, VAL=14.3%
Train loss=0.0286, Val loss=0.7019, Val f1=0.8899, Val acc=0.9250


Epoch 19/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 19 summary: total_time=30.3s, CPU=0.3%, GPU=85.6%, VAL=14.1%
Train loss=0.0283, Val loss=0.7283, Val f1=0.8813, Val acc=0.9199


Epoch 20/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 20 summary: total_time=30.4s, CPU=0.3%, GPU=85.8%, VAL=13.9%
Train loss=0.0312, Val loss=0.7021, Val f1=0.8742, Val acc=0.9153


Epoch 21/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.17 ms
Epoch 21 summary: total_time=30.3s, CPU=0.3%, GPU=85.5%, VAL=14.2%
Train loss=0.0408, Val loss=0.6670, Val f1=0.8913, Val acc=0.9240


Epoch 22/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.19 ms
Epoch 22 summary: total_time=30.3s, CPU=0.3%, GPU=85.7%, VAL=14.0%
Train loss=0.0237, Val loss=0.6984, Val f1=0.8857, Val acc=0.9209


Epoch 23/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 23 summary: total_time=30.3s, CPU=0.3%, GPU=85.7%, VAL=14.0%
Train loss=0.0162, Val loss=0.8926, Val f1=0.8904, Val acc=0.9245


Epoch 24/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 24 summary: total_time=30.3s, CPU=0.3%, GPU=85.7%, VAL=14.0%
Train loss=0.0217, Val loss=0.8154, Val f1=0.8708, Val acc=0.9112


Epoch 25/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 25 summary: total_time=30.4s, CPU=0.3%, GPU=85.8%, VAL=13.9%
Train loss=0.0170, Val loss=0.8937, Val f1=0.8731, Val acc=0.9133


Epoch 26/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 26 summary: total_time=30.3s, CPU=0.3%, GPU=85.7%, VAL=14.0%
Train loss=0.0107, Val loss=1.2169, Val f1=0.8693, Val acc=0.9133


Epoch 27/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 27 summary: total_time=30.3s, CPU=0.3%, GPU=85.7%, VAL=14.0%
Train loss=0.0289, Val loss=0.7537, Val f1=0.8409, Val acc=0.8704


Epoch 28/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 28 summary: total_time=30.3s, CPU=0.3%, GPU=85.7%, VAL=14.1%
Train loss=0.0291, Val loss=0.8371, Val f1=0.8730, Val acc=0.9158


Epoch 29/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 29 summary: total_time=30.4s, CPU=0.3%, GPU=85.7%, VAL=14.0%
Train loss=0.0300, Val loss=0.8947, Val f1=0.8844, Val acc=0.9224


Epoch 30/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 30 summary: total_time=30.2s, CPU=0.3%, GPU=85.7%, VAL=14.0%
Train loss=0.0116, Val loss=0.9240, Val f1=0.8861, Val acc=0.9235


Epoch 31/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 31 summary: total_time=30.4s, CPU=0.3%, GPU=85.5%, VAL=14.2%
Train loss=0.0069, Val loss=1.0257, Val f1=0.8852, Val acc=0.9235


Epoch 32/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 32 summary: total_time=30.3s, CPU=0.3%, GPU=85.7%, VAL=14.0%
Train loss=0.0064, Val loss=0.9802, Val f1=0.8917, Val acc=0.9219
Saved best model to EfficientNetB1/mobilenetv3_large_best.pth


Epoch 33/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.17 ms
Epoch 33 summary: total_time=30.3s, CPU=0.3%, GPU=85.7%, VAL=14.1%
Train loss=0.0288, Val loss=0.9084, Val f1=0.8868, Val acc=0.9250


Epoch 34/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 34 summary: total_time=30.5s, CPU=0.3%, GPU=85.8%, VAL=14.0%
Train loss=0.0235, Val loss=1.2647, Val f1=0.8522, Val acc=0.9077


Epoch 35/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 35 summary: total_time=30.3s, CPU=0.3%, GPU=85.6%, VAL=14.1%
Train loss=0.0277, Val loss=0.8543, Val f1=0.8832, Val acc=0.9168


Epoch 36/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.17 ms
Epoch 36 summary: total_time=30.3s, CPU=0.3%, GPU=85.7%, VAL=14.0%
Train loss=0.0159, Val loss=0.9523, Val f1=0.8785, Val acc=0.9153


Epoch 37/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.17 ms
Epoch 37 summary: total_time=30.3s, CPU=0.3%, GPU=85.7%, VAL=14.0%
Train loss=0.0147, Val loss=1.0272, Val f1=0.8877, Val acc=0.9245


Epoch 38/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.17 ms
Epoch 38 summary: total_time=30.3s, CPU=0.3%, GPU=85.7%, VAL=14.0%
Train loss=0.0116, Val loss=1.1112, Val f1=0.8834, Val acc=0.9209


Epoch 39/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.17 ms
Epoch 39 summary: total_time=30.4s, CPU=0.3%, GPU=85.8%, VAL=13.9%
Train loss=0.0296, Val loss=1.1535, Val f1=0.8664, Val acc=0.9158


Epoch 40/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 40 summary: total_time=30.3s, CPU=0.3%, GPU=85.5%, VAL=14.2%
Train loss=0.0314, Val loss=0.6962, Val f1=0.8757, Val acc=0.9122


Epoch 41/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.17 ms
Epoch 41 summary: total_time=30.3s, CPU=0.3%, GPU=85.7%, VAL=14.0%
Train loss=0.0168, Val loss=0.9098, Val f1=0.8852, Val acc=0.9235


Epoch 42/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.17 ms
Epoch 42 summary: total_time=30.3s, CPU=0.3%, GPU=85.7%, VAL=14.0%
Train loss=0.0083, Val loss=0.8735, Val f1=0.8762, Val acc=0.9102


Epoch 43/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 43 summary: total_time=30.5s, CPU=0.3%, GPU=85.8%, VAL=14.0%
Train loss=0.0054, Val loss=1.1986, Val f1=0.8922, Val acc=0.9276
Saved best model to EfficientNetB1/mobilenetv3_large_best.pth


Epoch 44/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.17 ms
Epoch 44 summary: total_time=30.4s, CPU=0.3%, GPU=85.6%, VAL=14.1%
Train loss=0.0096, Val loss=1.1141, Val f1=0.8770, Val acc=0.9184


Epoch 45/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.18 ms
Epoch 45 summary: total_time=30.3s, CPU=0.3%, GPU=85.7%, VAL=14.1%
Train loss=0.0119, Val loss=1.1259, Val f1=0.8770, Val acc=0.9168


Epoch 46/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 46 summary: total_time=30.3s, CPU=0.3%, GPU=85.8%, VAL=14.0%
Train loss=0.0259, Val loss=0.9812, Val f1=0.8849, Val acc=0.9230


Epoch 47/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.17 ms
Epoch 47 summary: total_time=30.3s, CPU=0.3%, GPU=85.7%, VAL=14.0%
Train loss=0.0069, Val loss=0.9543, Val f1=0.8840, Val acc=0.9184


Epoch 48/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 48 summary: total_time=30.5s, CPU=0.3%, GPU=85.7%, VAL=14.1%
Train loss=0.0211, Val loss=1.1036, Val f1=0.8711, Val acc=0.9117


Epoch 49/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.17 ms
Epoch 49 summary: total_time=30.3s, CPU=0.3%, GPU=85.7%, VAL=14.0%
Train loss=0.0270, Val loss=1.0768, Val f1=0.8814, Val acc=0.9158


Epoch 50/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 50 summary: total_time=30.3s, CPU=0.3%, GPU=85.7%, VAL=14.0%
Train loss=0.0124, Val loss=1.2452, Val f1=0.8701, Val acc=0.9128


Epoch 51/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.17 ms
Epoch 51 summary: total_time=30.3s, CPU=0.3%, GPU=85.7%, VAL=14.0%
Train loss=0.0133, Val loss=0.9851, Val f1=0.8773, Val acc=0.9128


Epoch 52/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.20 ms
Epoch 52 summary: total_time=30.3s, CPU=0.3%, GPU=85.7%, VAL=14.0%
Train loss=0.0224, Val loss=1.0410, Val f1=0.8889, Val acc=0.9209


Epoch 53/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 53 summary: total_time=30.4s, CPU=0.3%, GPU=85.9%, VAL=13.9%
Train loss=0.0043, Val loss=1.2445, Val f1=0.8821, Val acc=0.9158


Epoch 54/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.17 ms
Epoch 54 summary: total_time=30.3s, CPU=0.3%, GPU=85.7%, VAL=14.0%
Train loss=0.0039, Val loss=1.0151, Val f1=0.8888, Val acc=0.9199


Epoch 55/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.17 ms
Epoch 55 summary: total_time=30.3s, CPU=0.3%, GPU=85.7%, VAL=14.0%
Train loss=0.0114, Val loss=1.1472, Val f1=0.8931, Val acc=0.9245
Saved best model to EfficientNetB1/mobilenetv3_large_best.pth


Epoch 56/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 56 summary: total_time=30.2s, CPU=0.3%, GPU=85.7%, VAL=14.0%
Train loss=0.0211, Val loss=0.8675, Val f1=0.8692, Val acc=0.9112


Epoch 57/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.17 ms
Epoch 57 summary: total_time=30.5s, CPU=0.3%, GPU=85.7%, VAL=14.0%
Train loss=0.0203, Val loss=0.9997, Val f1=0.8682, Val acc=0.9128


Epoch 58/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 58 summary: total_time=30.3s, CPU=0.3%, GPU=85.7%, VAL=14.0%
Train loss=0.0147, Val loss=1.1761, Val f1=0.8769, Val acc=0.9158


Epoch 59/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.18 ms
Epoch 59 summary: total_time=30.3s, CPU=0.3%, GPU=85.5%, VAL=14.2%
Train loss=0.0067, Val loss=1.2415, Val f1=0.8807, Val acc=0.9209


Epoch 60/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.16 ms
Epoch 60 summary: total_time=30.3s, CPU=0.3%, GPU=85.7%, VAL=14.0%
Train loss=0.0052, Val loss=1.1900, Val f1=0.8881, Val acc=0.9219


Epoch 61/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.17 ms
Epoch 61 summary: total_time=30.3s, CPU=0.3%, GPU=85.7%, VAL=14.0%
Train loss=0.0076, Val loss=1.1962, Val f1=0.8790, Val acc=0.9168


Epoch 62/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.17 ms
Epoch 62 summary: total_time=30.6s, CPU=0.3%, GPU=85.4%, VAL=14.3%
Train loss=0.0057, Val loss=1.4689, Val f1=0.8837, Val acc=0.9209


Epoch 63/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.18 ms
Epoch 63 summary: total_time=30.3s, CPU=0.3%, GPU=85.7%, VAL=14.1%
Train loss=0.0236, Val loss=0.8356, Val f1=0.8830, Val acc=0.9158


Epoch 64/100:   0%|          | 0/276 [00:00<?, ?it/s]

Average inference time per image: 0.22 ms
Epoch 64 summary: total_time=30.4s, CPU=0.3%, GPU=85.3%, VAL=14.4%
Train loss=0.0270, Val loss=0.9862, Val f1=0.8684, Val acc=0.9112


Epoch 65/100:   0%|          | 0/276 [00:00<?, ?it/s]

KeyboardInterrupt: 

## Оценка на test

In [6]:
best_ckpt = os.path.join(OUTPUT_DIR, 'efficientnet_b1_best.pth')  

if os.path.exists(best_ckpt):
    checkpoint = torch.load(best_ckpt, map_location=DEVICE)
    model.load_state_dict(checkpoint['model_state'])

    print("\n---------------- Final evaluation on TEST set ----------------")
    test_results = evaluate_test(model, test_loader, criterion, DEVICE, class_names=list(class_to_idx.keys()))
else:
    print("No checkpoint found.")


---------------- Final evaluation on TEST set ----------------


Evaluating on TEST set:   0%|          | 0/77 [00:00<?, ?it/s]


---------------- Final TEST Evaluation ----------------
Loss:      0.9967
Accuracy:  0.9188
Precision: 0.8845
Recall:    0.8804
F1-score:  0.8822

Confusion Matrix:
[[ 385   14   23]
 [  24  684  176]
 [  24  137 3433]]

Per-class metrics:
              precision  recall  f1-score    support
absent           0.8891  0.9123    0.9006   422.0000
part             0.8192  0.7738    0.7958   884.0000
present          0.9452  0.9552    0.9502  3594.0000
accuracy         0.9188  0.9188    0.9188     0.9188
macro avg        0.8845  0.8804    0.8822  4900.0000
weighted avg     0.9176  0.9188    0.9181  4900.0000

Average inference time per image: 0.18 ms
