In [None]:

%pip install --upgrade pip


%pip uninstall -y torch torchvision torchaudio


%pip install --no-cache-dir torch torchvision torchaudio \
               --index-url https://download.pytorch.org/whl/cu118

%pip install timm albumentations==1.4.18 opencv-python-headless tqdm resnest ipywidgets


!jupyter nbextension enable --py widgetsnbextension --sys-prefix
!jupyter labextension install @jupyter-widgets/jupyterlab-manager --no-build
!jupyter lab build  


Vision Transformer 61 class

In [None]:
import os, re, pandas as pd

BASE_DIR = r"F:\GroceryStoreDataset-master"             
TRAIN_TXT = os.path.join(BASE_DIR, "dataset", "train.txt")
VAL_TXT   = os.path.join(BASE_DIR, "dataset", "val.txt")

COLS = ["rel_path", "fine", "coarse"]
train_df = pd.read_csv(TRAIN_TXT, sep=r"\s+", header=None, names=COLS, dtype=str)
val_df   = pd.read_csv(VAL_TXT,   sep=r"\s+", header=None, names=COLS, dtype=str)

def make_abs(p: str) -> str:
    p = p.rstrip(",; ")                 
    if not p.startswith("dataset"):     
        p = os.path.join("dataset", p)
    return os.path.join(BASE_DIR, p)

train_df["image_path"] = train_df["rel_path"].apply(make_abs)
val_df["image_path"]   = val_df["rel_path"].apply(make_abs)

train_df["label"] = train_df["fine"].str.replace(r"[^\d]", "", regex=True).astype(int)
val_df["label"]   = val_df["fine"].str.replace(r"[^\d]", "", regex=True).astype(int)

train_df = train_df[["image_path", "label"]]
val_df   = val_df[["image_path", "label"]]

train_df = train_df[train_df["image_path"].apply(os.path.exists)].reset_index(drop=True)
val_df   = val_df[val_df["image_path"].apply(os.path.exists)].reset_index(drop=True)

print("Train / Val samples:", len(train_df), "/", len(val_df))
print(train_df.head(3))


In [None]:
from torchvision import transforms
from torch.utils.data import Dataset
from PIL import Image

train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.8,1.0)),
    transforms.RandomHorizontalFlip(0.5),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
])

val_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
])

class CustomDataset(Dataset):
    def __init__(self, df, transform=None):
        self.paths  = df['image_path'].tolist()
        self.labels = df['label'].tolist()
        self.tf     = transform

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, i):
        img = Image.open(self.paths[i]).convert('RGB')
        if self.tf:
            img = self.tf(img)
        return img, self.labels[i]

print("Transforms & Dataset ready")


In [None]:
from torch.utils.data import DataLoader

BATCH_SIZE = 16    

train_ds = CustomDataset(train_df, transform=train_transform)
val_ds   = CustomDataset(val_df,   transform=val_transform)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,
                          num_workers=0, pin_memory=True)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False,
                          num_workers=0, pin_memory=True)

print(f"Train: {len(train_ds)} samples, Val: {len(val_ds)} samples, Batch={BATCH_SIZE}")


In [None]:
import torch
import torch.nn as nn
import timm
from torch.optim.lr_scheduler import CosineAnnealingLR
from tqdm.notebook import tqdm

model = timm.create_model('vit_base_patch16_224',
                         pretrained=True,
                         num_classes=NUM_CLASSES).to(DEVICE)

for p in model.parameters():     p.requires_grad = False
for p in model.head.parameters(): p.requires_grad = True

optimizer = torch.optim.AdamW(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=LR_HEAD,
    weight_decay=WEIGHT_DECAY
)
scheduler = CosineAnnealingLR(optimizer, T_max=TOTAL_EPOCHS)
criterion = nn.CrossEntropyLoss()

best_val_acc = 0.0

for epoch in range(1, TOTAL_EPOCHS+1):
    print(f"\n=== Epoch {epoch}/{TOTAL_EPOCHS} ===")
    if epoch == HEAD_ONLY_EPOCHS + 1:
        for p in model.parameters():
            p.requires_grad = True
        optimizer = torch.optim.AdamW(model.parameters(),
                                      lr=LR_FULL,
                                      weight_decay=WEIGHT_DECAY)
        scheduler = CosineAnnealingLR(optimizer, T_max=FULL_EPOCHS)
        print(f" Unfroze full backbone, LR set to {LR_FULL}")

    model.train()
    tr_loss = 0.0
    tr_correct = 0
    train_bar = tqdm(train_loader, desc="Train", leave=False)
    for step, (imgs, lbls) in enumerate(train_bar, start=1):
        imgs, lbls = imgs.to(DEVICE), lbls.to(DEVICE)

        logits = model(imgs)
        loss = criterion(logits, lbls)
        loss.backward()
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()

        batch_size = imgs.size(0)
        tr_loss    += loss.item() * batch_size
        tr_correct += (logits.argmax(1) == lbls).sum().item()

        num_seen = step * batch_size
        train_bar.set_postfix({
            "loss": f"{tr_loss/num_seen:.3f}",
            "acc":  f"{100*tr_correct/num_seen:.1f}%"
        })

    train_acc  = 100 * tr_correct / len(train_loader.dataset)
    train_loss = tr_loss / len(train_loader.dataset)

    model.eval()
    val_loss = 0.0
    val_correct = 0
    with torch.no_grad():
        for imgs, lbls in tqdm(val_loader, desc="Val  ", leave=False):
            imgs, lbls = imgs.to(DEVICE), lbls.to(DEVICE)
            out = model(imgs)
            val_loss    += criterion(out, lbls).item() * imgs.size(0)
            val_correct += (out.argmax(1) == lbls).sum().item()

    val_acc  = 100 * val_correct / len(val_loader.dataset)
    val_loss = val_loss / len(val_loader.dataset)

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_vit.pth")
        print(f"New best! val_acc = {val_acc:.2f}%")

    print(
        f"Epoch {epoch:02d}/{TOTAL_EPOCHS} | "
        f"train_loss={train_loss:.3f}, train_acc={train_acc:.1f}% | "
        f"val_loss={val_loss:.3f}, val_acc={val_acc:.1f}%"
    )

torch.save(model.state_dict(), "last_vit.pth")
print("Training complete – last_vit.pth saved.")


In [None]:
import torch
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt

model.eval()
all_preds, all_labels = [], []
with torch.no_grad():
    for imgs, lbls in val_loader:
        imgs = imgs.to(DEVICE)
        out = model(imgs)
        preds = out.argmax(1).cpu().numpy()
        all_preds.extend(preds)
        all_labels.extend(lbls.numpy())

cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(8,8))
sns.heatmap(cm, annot=False, cmap="Blues")
plt.title("Confusion Matrix")
plt.ylabel("True")
plt.xlabel("Pred")
plt.show()

print(classification_report(all_labels, all_preds, digits=3))


In [None]:
import re, pandas as pd

with open("training.log", encoding="utf-8") as f:
    lines = f.readlines()

pattern = re.compile(
    r"Epoch\s+(\d+)[/\\]\d+\s*\|\s*"
    r"train_loss=([\d.]+),\s*train_acc=([\d.]+)%\s*\|\s*"
    r"val_loss=([\d.]+),\s*val_acc=([\d.]+)%"
)

epochs, tr_loss, tr_acc, vl_loss, vl_acc = [], [], [], [], []
for line in lines:
    m = pattern.search(line)
    if m:
        epochs.append(int(m.group(1)))
        tr_loss.append(float(m.group(2)))
        tr_acc.append(float(m.group(3)))
        vl_loss.append(float(m.group(4)))
        vl_acc.append(float(m.group(5)))

df = pd.DataFrame({
    "epoch":      epochs,
    "train_loss": tr_loss,
    "train_acc":  tr_acc,
    "val_loss":   vl_loss,
    "val_acc":    vl_acc
})
df.to_csv("parsed_metrics.csv", index=False)
print("Saved parsed_metrics.csv with", len(df), "rows")


In [None]:
import pandas as pd
df = pd.read_csv("parsed_metrics.csv")
print(df.head())
print(df.tail())


In [None]:
import matplotlib.pyplot as plt

df = pd.read_csv("parsed_metrics.csv")

plt.figure()
plt.plot(df["epoch"], df["train_loss"], label="train_loss")
plt.plot(df["epoch"], df["val_loss"],   label="val_loss")
plt.xlabel("Епоха")
plt.ylabel("Loss")
plt.title("Train vs Val Loss")
plt.legend()
plt.show()

plt.figure()
plt.plot(df["epoch"], df["train_acc"], label="train_acc")
plt.plot(df["epoch"], df["val_acc"],   label="val_acc")
plt.xlabel("Епоха")
plt.ylabel("Accuracy (%)")
plt.title("Train vs Val Accuracy")
plt.legend()
plt.show()


ConvNext 61 class

In [None]:
import os
import re
import pandas as pd

BASE_DIR  = r"F:\GroceryStoreDataset-master"
TRAIN_TXT = os.path.join(BASE_DIR, "dataset", "train.txt")
VAL_TXT   = os.path.join(BASE_DIR, "dataset", "val.txt")

COLS = ["rel_path", "fine", "coarse"]
train_df = pd.read_csv(TRAIN_TXT, sep=r"\s+", header=None, names=COLS, dtype=str)
val_df   = pd.read_csv(VAL_TXT,   sep=r"\s+", header=None, names=COLS, dtype=str)

def make_abs(p: str) -> str:
    p = p.rstrip(", ")
    if not p.startswith("dataset"):
        p = os.path.join("dataset", p)
    return os.path.join(BASE_DIR, p)

train_df["image_path"] = train_df["rel_path"].apply(make_abs)
val_df["image_path"]   = val_df["rel_path"].apply(make_abs)

train_df["label"] = train_df["fine"].str.replace(r"[^\d]", "", regex=True).astype(int)
val_df["label"]   = val_df["fine"].str.replace(r"[^\d]", "", regex=True).astype(int)

train_df = train_df[["image_path", "label"]]
val_df   = val_df[["image_path", "label"]]

train_df = train_df[train_df["image_path"].apply(os.path.exists)].reset_index(drop=True)
val_df   = val_df[val_df["image_path"].apply(os.path.exists)].reset_index(drop=True)

NUM_CLASSES = train_df["label"].nunique()

print(f"Train samples: {len(train_df)}, Val samples: {len(val_df)}, Classes = {NUM_CLASSES}")
print(train_df.head(3))


In [None]:
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image

train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.8,1.0)),
    transforms.RandomHorizontalFlip(0.5),
    transforms.ColorJitter(0.2,0.2,0.2,0.1),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])
val_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])

class GroceryDataset(Dataset):
    def __init__(self, df, transform=None):
        self.paths  = df["image_path"].tolist()
        self.labels = df["label"].tolist()
        self.transform = transform
    def __len__(self):
        return len(self.paths)
    def __getitem__(self, idx):
        img = Image.open(self.paths[idx]).convert("RGB")
        if self.transform:
            img = self.transform(img)
        return img, self.labels[idx]

BATCH_SIZE = 16  
train_ds = GroceryDataset(train_df, transform=train_transform)
val_ds   = GroceryDataset(val_df,   transform=val_transform)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,  pin_memory=True)
val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE, shuffle=False, pin_memory=True)

print(f"DataLoaders ready: train batches = {len(train_loader)}, val batches = {len(val_loader)}")


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import timm

print("NUM_CLASSES =", NUM_CLASSES)

DEVICE           = torch.device("cuda" if torch.cuda.is_available() else "cpu")
head_only_epochs = 10
full_epochs      = 50
TOTAL_EPOCHS     = head_only_epochs + full_epochs
lr_head          = 3e-4
lr_full          = 1e-5
weight_decay     = 1e-4

model = timm.create_model("convnext_base", pretrained=True, num_classes=0)

in_ch = model.head.norm.normalized_shape[0] 
model.head = nn.Sequential(
    model.head.global_pool,    
    nn.Flatten(1),            
    nn.LayerNorm(in_ch),       
    nn.Dropout(0.0),           
    nn.Linear(in_ch, NUM_CLASSES)  
)

model.to(DEVICE)

for p in model.parameters():
    p.requires_grad = False
for p in model.head.parameters():
    p.requires_grad = True

optimizer = optim.AdamW(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=lr_head, weight_decay=weight_decay
)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=head_only_epochs)
criterion = nn.CrossEntropyLoss()
scaler    = torch.cuda.amp.GradScaler()

best_val_acc   = 0.0
train_losses, train_accs = [], []
val_losses,   val_accs   = [], []

total     = sum(p.numel() for p in model.parameters())
trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f" Model ready on {DEVICE}")
print(f"Trainable params: {trainable}/{total} ({100*trainable/total:.2f}%)")
print(f"Запускаємо тренування на {TOTAL_EPOCHS} епох "
      f"({head_only_epochs} head-only + {full_epochs} full-fine)")


In [None]:
for epoch in range(1, TOTAL_EPOCHS+1):
    if epoch == head_only_epochs + 1:
        for p in model.parameters():
            p.requires_grad = True
        optimizer = optim.AdamW(model.parameters(), lr=lr_full, weight_decay=weight_decay)
        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=full_epochs)
        print(f"Розморожено всі шари — lr ← {lr_full}")

    model.train()
    running_loss, running_correct = 0.0, 0
    for imgs, labels in train_loader:
        imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        with torch.cuda.amp.autocast():
            logits = model(imgs)
            loss   = criterion(logits, labels)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss    += loss.item() * imgs.size(0)
        running_correct += (logits.argmax(1) == labels).sum().item()

    train_loss = running_loss / len(train_loader.dataset)
    train_acc  = 100 * running_correct / len(train_loader.dataset)
    train_losses.append(train_loss)
    train_accs.append(train_acc)
    scheduler.step()

    model.eval()
    val_loss, val_correct = 0.0, 0
    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
            with torch.cuda.amp.autocast():
                out  = model(imgs)
                loss = criterion(out, labels)
            val_loss    += loss.item() * imgs.size(0)
            val_correct += (out.argmax(1) == labels).sum().item()

    val_loss = val_loss / len(val_loader.dataset)
    val_acc  = 100 * val_correct / len(val_loader.dataset)
    val_losses.append(val_loss)
    val_accs.append(val_acc)

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_convnext.pth")
        print(f"Епоха {epoch}: new best val_acc = {val_acc:.2f}%")

    print(f"Епоха {epoch:02d}/{TOTAL_EPOCHS} | "
          f"train_loss={train_loss:.3f}, train_acc={train_acc:.1f}% | "
          f"val_loss={val_loss:.3f}, val_acc={val_acc:.1f}%")


In [None]:
import pandas as pd
import torch

torch.save(model.state_dict(), "last_convnext.pth")
print("Фінальна модель збережена як last_convnext.pth")

logs_df = pd.DataFrame({
    "epoch":      list(range(1, TOTAL_EPOCHS+1)),
    "train_loss": train_losses,
    "train_acc":  train_accs,
    "val_loss":   val_losses,
    "val_acc":    val_accs
})
logs_df.to_csv("convnext_training_log.csv", index=False)
print("Лог тренування збережено в convnext_training_log.csv")


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

logs_df = pd.read_csv("convnext_training_log.csv")

plt.figure(figsize=(8,5))
plt.plot(logs_df["epoch"], logs_df["train_loss"], label="train_loss")
plt.plot(logs_df["epoch"], logs_df["val_loss"],   label="val_loss")
plt.xlabel("Епоха")
plt.ylabel("Loss")
plt.title("Train vs Val Loss")
plt.legend()
plt.tight_layout()
plt.show()

plt.figure(figsize=(8,5))
plt.plot(logs_df["epoch"], logs_df["train_acc"], label="train_acc")
plt.plot(logs_df["epoch"], logs_df["val_acc"],   label="val_acc")
plt.xlabel("Епоха")
plt.ylabel("Accuracy (%)")
plt.title("Train vs Val Accuracy")
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
import torch
from sklearn.metrics import confusion_matrix, classification_report
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


model.load_state_dict(torch.load("best_convnext.pth"))
model.eval()
model.to(DEVICE)

all_preds, all_labels = [], []
with torch.no_grad():
    for imgs, labels in val_loader:
        imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
        out = model(imgs)
        preds = out.argmax(dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(12,10))
sns.heatmap(cm, cmap="Blues", cbar=True, square=True)
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix — ConvNeXt-Base на валідації")
plt.tight_layout()
plt.savefig("confusion_matrix_convnext.png")
plt.show()

report_dict = classification_report(all_labels, all_preds, output_dict=True, zero_division=0)
report_df = pd.DataFrame(report_dict).T
report_df.to_csv("classification_report_convnext.csv", index=True)

print("Збережено:")
print("   • confusion_matrix_convnext.png")
print("   • classification_report_convnext.csv")


resnet152

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from tqdm import tqdm
from torchvision.transforms.v2 import MixUp, RandomErasing
import random
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)


In [None]:
train_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomHorizontalFlip(),
    transforms.RandAugment(num_ops=2, magnitude=9),
    transforms.ToTensor(),
    transforms.RandomErasing(p=0.5),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
val_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])


In [None]:
from torchvision import transforms

train_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.4, contrast=0.4,
                           saturation=0.3, hue=0.04),          
    transforms.RandomAffine(degrees=5, translate=(0.05, 0.05)),
    transforms.RandAugment(num_ops=2, magnitude=9),
    
    transforms.ToTensor(),                                     
    transforms.RandomErasing(p=0.25),                          
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225]),
])

val_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225]),
])


In [None]:
train_ds = ImageFolder("F:/GroceryStoreDataset-master/dataset/train", transform=train_transform)
val_ds   = ImageFolder("F:/GroceryStoreDataset-master/dataset/val", transform=val_transform)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True, num_workers=0)
val_loader = DataLoader(val_ds, batch_size=32, shuffle=False, num_workers=0)

NUM_CLASSES = len(train_ds.classes)


In [None]:
model = torchvision.models.resnet152(pretrained=True)
in_features = model.fc.in_features
model.fc = nn.Linear(in_features, NUM_CLASSES)
model = model.to(device)


In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=0.01, momentum=0.9)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)
mixup = MixUp(alpha=1.0, num_classes=NUM_CLASSES)

HEAD_ONLY_EPOCHS = 10
TOTAL_EPOCHS = 60

for name, param in model.named_parameters():
    if "fc" not in name:
        param.requires_grad = False


In [None]:
from tqdm import tqdm
import pandas as pd
from datetime import datetime

best_val_acc = 0.0
train_losses = []
val_accuracies = []

for epoch in range(TOTAL_EPOCHS):
    model.train()
    running_loss = 0.0
    running_corrects = 0
    total = 0

    loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{TOTAL_EPOCHS}", leave=False)
    for images, labels in loop:
        images, labels = images.to(device), labels.to(device)

        if random.random() < 0.6:
            images, labels = mixup(images, labels)

        optimizer.zero_grad()
        with torch.cuda.amp.autocast():
            outputs = model(images)
            loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        _, preds = torch.max(outputs, 1)
        if labels.ndim == 2:  
            labels = labels.argmax(dim=1)
        running_loss += loss.item() * labels.size(0)
        running_corrects += torch.sum(preds == labels).item()
        total += labels.size(0)

        loop.set_postfix(loss=running_loss/total, acc=running_corrects/total*100)

    scheduler.step()

    epoch_train_loss = running_loss / total
    train_losses.append(epoch_train_loss)

    model.eval()
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            val_correct += torch.sum(preds == labels).item()
            val_total += labels.size(0)

    val_acc = val_correct / val_total
    val_accuracies.append(val_acc)

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_resnet152.pth")

    print(f"Епоха {epoch+1:02d} | val_acc={val_acc*100:.2f}%, best={best_val_acc*100:.2f}%")

    if epoch + 1 == HEAD_ONLY_EPOCHS:
        for param in model.parameters():
            param.requires_grad = True
        optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-4)
        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=(TOTAL_EPOCHS - HEAD_ONLY_EPOCHS))
        print("==> FULL-FINETUNE PHASE")

log_df = pd.DataFrame({
    "epoch": list(range(1, TOTAL_EPOCHS+1)),
    "train_loss": train_losses,
    "val_acc": [x*100 for x in val_accuracies]  
})
csv_name = f"training_log_resnet152_{datetime.now():%Y%m%d_%H%M%S}.csv"
log_df.to_csv(csv_name, index=False)
print(f"Лог тренування збережено у \"{csv_name}\"")


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

log_df = pd.read_csv("training_log_resnet152_20250529_204320.csv")

plt.figure(figsize=(8,4))
plt.plot(log_df["epoch"], log_df["train_loss"], label="Train Loss")
plt.title("Train Loss по епохах")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.grid(True)
plt.legend()
plt.show()

plt.figure(figsize=(8,4))
plt.plot(log_df["epoch"], log_df["val_acc"], label="Val Accuracy", color="orange")
plt.title("Validation Accuracy (%) по епохах")
plt.xlabel("Epoch")
plt.ylabel("Accuracy (%)")
plt.grid(True)
plt.legend()
plt.show()


In [None]:
import torch
import torchvision
import torch.nn as nn
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

model = torchvision.models.resnet152(
    weights=torchvision.models.ResNet152_Weights.IMAGENET1K_V2
)
in_features = model.fc.in_features
model.fc = nn.Linear(in_features, NUM_CLASSES)
model = model.to(device)

model.load_state_dict(torch.load("best_resnet152.pth", map_location=device))
model.eval()

all_preds = []
all_labels = []

with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

print("Classification Report:\n")
print(classification_report(all_labels, all_preds, target_names=train_ds.classes))

cm = confusion_matrix(all_labels, all_preds)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=train_ds.classes)
fig, ax = plt.subplots(figsize=(12,12))
disp.plot(ax=ax, xticks_rotation=90)
plt.title("Confusion Matrix")
plt.show()


In [None]:
from torch.utils.data import ConcatDataset, DataLoader

mixed_train_ds = ConcatDataset([train_ds, test_ds])   
mixed_loader   = DataLoader(mixed_train_ds, batch_size=32,
                            shuffle=True, num_workers=0, pin_memory=True)

for p in model.parameters():        p.requires_grad = False
for p in model.fc.parameters():     p.requires_grad = True

optimizer = torch.optim.AdamW(model.fc.parameters(), lr=1e-5, weight_decay=1e-4)
criterion  = torch.nn.CrossEntropyLoss()
EPOCHS_FINE = 5

from tqdm.notebook import tqdm
for ep in range(EPOCHS_FINE):
    model.train(); running=0; correct=0; total=0
    loop = tqdm(mixed_loader, desc=f"Fine {ep+1}/{EPOCHS_FINE}", leave=False)
    for imgs, lbls in loop:
        imgs, lbls = imgs.to(device), lbls.to(device)
        optimizer.zero_grad()
        with torch.cuda.amp.autocast():
            out = model(imgs); loss = criterion(out, lbls)
        loss.backward(); optimizer.step()
        running += loss.item()*lbls.size(0)
        correct += (out.argmax(1)==lbls).sum().item(); total += lbls.size(0)
        loop.set_postfix(loss=running/total, acc=correct/total*100)
    print(f"Fine-epoch {ep+1}:  train_acc {correct/total*100:.1f}%")

torch.save(model.state_dict(), "resnet152_finetuned.pth")


In [None]:
from tqdm.notebook import tqdm
import torch.nn as nn
import torch

for name, p in model.named_parameters():
    if name.startswith("layer4") or name.startswith("fc"):
        p.requires_grad = True
    else:
        p.requires_grad = False

opt = torch.optim.AdamW([
    {"params": [p for n, p in model.named_parameters() if n.startswith("layer4")],
     "lr": 1e-5},
    {"params": model.fc.parameters(), "lr": 1e-4}
], weight_decay=1e-4)

criterion = nn.CrossEntropyLoss()
EPOCHS_FINE = 4

for ep in range(EPOCHS_FINE):
    model.train(); running=0; correct=0; total=0
    loop = tqdm(mixed_loader, desc=f"Fine {ep+1}/{EPOCHS_FINE}", leave=False)
    for imgs, lbls in loop:
        imgs, lbls = imgs.to(device), lbls.to(device)
        opt.zero_grad()
        with torch.cuda.amp.autocast():
            out = model(imgs)
            loss = criterion(out, lbls)
        loss.backward(); opt.step()

        running += loss.item() * lbls.size(0)
        correct += (out.argmax(1) == lbls).sum().item()
        total   += lbls.size(0)
        loop.set_postfix(loss=running/total, acc=f"{correct/total*100:.1f}%")
    print(f"Fine-epoch {ep+1}:  train_acc {correct/total*100:.1f}%")

torch.save(model.state_dict(), "resnet152_layer4_finetuned.pth")
print("Збережено: resnet152_layer4_finetuned.pth")


In [None]:
import torch
import torchvision
import torch.nn as nn
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import numpy as np

test_transform = val_transform   

test_root = r"F:/real_test"    
test_ds   = ImageFolder(test_root, transform=test_transform)
test_loader = DataLoader(test_ds, batch_size=32, shuffle=False, num_workers=0)

print(f"Test samples: {len(test_ds)},  Classes: {test_ds.classes}")

model = torchvision.models.resnet152(
    weights=torchvision.models.ResNet152_Weights.IMAGENET1K_V2
)
model.fc = nn.Linear(model.fc.in_features, len(test_ds.classes))
model = model.to(device)

model.load_state_dict(torch.load("resnet152_layer4_finetuned.pth", map_location=device))
model.eval()

all_preds, all_labels = [], []
with torch.no_grad():
    for imgs, labels in test_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        outputs = model(imgs)
        preds = outputs.argmax(1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

test_acc = (np.array(all_preds) == np.array(all_labels)).mean() * 100
print(f"\nTest Accuracy: {test_acc:.2f}%")

print("\nClassification Report:\n")
print(classification_report(all_labels, all_preds, target_names=test_ds.classes))

cm  = confusion_matrix(all_labels, all_preds)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=test_ds.classes)
fig, ax = plt.subplots(figsize=(8,8))
disp.plot(ax=ax, xticks_rotation=45, values_format='d')
plt.title("Real-test Confusion Matrix")
plt.show()


ViT на 3 класах

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from tqdm import tqdm
from torchvision.transforms.v2 import MixUp, RandomErasing
import random
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)


In [None]:

from torchvision import transforms

train_transform = transforms.Compose([
    transforms.Resize(256),             
    transforms.RandomResizedCrop(224),    
    transforms.RandomHorizontalFlip(),
    transforms.RandAugment(num_ops=2, magnitude=9),
    transforms.ToTensor(),
    transforms.RandomErasing(p=0.5),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225]),
])

val_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),         
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225]),
])


In [None]:
import os
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

DATA_DIR = "F:/GroceryStoreDataset-master/dataset"         
TRAIN_DIR = os.path.join(DATA_DIR, "train")
VAL_DIR   = os.path.join(DATA_DIR, "val")

train_ds = ImageFolder(TRAIN_DIR, transform=train_transform)
val_ds   = ImageFolder(VAL_DIR,   transform=val_transform)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True,  num_workers=4)
val_loader   = DataLoader(val_ds,   batch_size=32, shuffle=False, num_workers=4)

NUM_CLASSES = 3    

print(f"Train images: {len(train_ds)},  Val images: {len(val_ds)},  Classes: {NUM_CLASSES}")
print("Приклад міток перших 10 зображень:", [train_ds[i][1] for i in range(10)])


In [None]:
import timm, re

CKPT_PATH = "last_vit.pth"        
MODEL_NAME = "vit_base_patch16_224"

model = timm.create_model(MODEL_NAME, pretrained=False, num_classes=NUM_CLASSES)

state = torch.load(CKPT_PATH, map_location="cpu")
state = {k: v for k, v in state.items()
         if not re.match(r"^(head|fc|classifier)\.", k)}   
missing, unexpected = model.load_state_dict(state, strict=False)
print(f" Backbone завантажено  |  missing = {len(missing)}, unexpected = {len(unexpected)}")

model = model.to(device)


In [None]:
from torchvision.transforms.v2 import MixUp, RandomErasing   


In [None]:
import random
from timm.loss import SoftTargetCrossEntropy    
criterion = nn.CrossEntropyLoss()
use_mixup = False          

for name, p in model.named_parameters():
    p.requires_grad = name.startswith("head")

optimizer = optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()),
                        lr=1e-3, weight_decay=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)

HEAD_ONLY_EPOCHS = 10
TOTAL_EPOCHS     = 60


In [None]:
import random


In [None]:
from tqdm import tqdm
import pandas as pd
from datetime import datetime
import torch, random

best_val_acc   = 0.0
train_losses   = []
val_accuracies = []
patience = 6
epochs_no_improve = 0

optimizer = optim.SGD(model.head.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=HEAD_ONLY_EPOCHS)

use_amp   = False          
use_mixup = False         

for epoch in range(TOTAL_EPOCHS):
    model.train()
    running_loss, running_corrects, total = 0, 0, 0
    loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{TOTAL_EPOCHS}", leave=False)

    for images, labels in loop:
        images, labels = images.to(device), labels.to(device)

        loss_fn = criterion

        optimizer.zero_grad()
        if use_amp:
            with torch.cuda.amp.autocast():
                outputs = model(images)
                loss    = loss_fn(outputs, labels)
        else:
            outputs = model(images)
            loss    = loss_fn(outputs, labels)

        if torch.isnan(loss):
            print("NaN detected — пропускаємо батч")
            continue

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()

        running_loss    += loss.item() * labels.size(0)
        running_corrects += (outputs.argmax(1) == labels).sum().item()
        total += labels.size(0)
        loop.set_postfix(loss=running_loss/total,
                         acc=running_corrects/total*100)

    scheduler.step()
    train_losses.append(running_loss/total)

    model.eval()
    val_correct, val_total = 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            val_correct += (outputs.argmax(1) == labels).sum().item()
            val_total   += labels.size(0)
    val_acc = val_correct / val_total
    val_accuracies.append(val_acc)

    if val_acc > best_val_acc + 1e-4:
        best_val_acc = val_acc
        epochs_no_improve = 0
        torch.save(model.state_dict(), "best_vit3cls.pth")
    else:
        epochs_no_improve += 1

    print(f"Епоха {epoch+1:>2}/{TOTAL_EPOCHS} | val_acc = {val_acc*100:5.2f}% "
          f"| best = {best_val_acc*100:5.2f}%")

    if epoch + 1 == HEAD_ONLY_EPOCHS:
        model.load_state_dict(torch.load("best_vit3cls.pth"))
        print(f"Повернули найкращий head-only чекпойнт ({best_val_acc*100:.2f} %)")

        for p in model.parameters():
            p.requires_grad = True

        decay, no_decay = [], []
        for n, p in model.named_parameters():
            if not p.requires_grad:
                continue
            if any(k in n.lower() for k in ["bias", "norm", "ln"]):
                no_decay.append(p)
            else:
                decay.append(p)

        optimizer = optim.AdamW(
            [
                {"params": no_decay, "lr": 3e-6, "weight_decay": 0.0},
                {"params": decay,    "lr": 3e-6, "weight_decay": 1e-2},
                {"params": model.head.parameters(), "lr": 3e-4, "weight_decay": 1e-2},
            ]
        )
        scheduler = optim.lr_scheduler.CosineAnnealingLR(
            optimizer, T_max=TOTAL_EPOCHS - HEAD_ONLY_EPOCHS)
        print("➜  FULL-FINETUNE PHASE (AdamW, LR_backbone = 3e-6)")

    if epochs_no_improve >= patience:
        print(" Early-stopping (val не росте)")
        break

log_df = pd.DataFrame({
    "epoch": list(range(1, len(train_losses)+1)),
    "train_loss": train_losses,
    "val_acc": [x*100 for x in val_accuracies]
})
csv_name = f"training_log_vit3cls_{datetime.now():%Y%m%d_%H%M%S}.csv"
log_df.to_csv(csv_name, index=False)
print(f"\n Лог тренування збережено у «{csv_name}»")


In [None]:
import torch
import timm
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torchvision import transforms

VAL_DIR = "F:/GroceryStoreDataset-master/dataset/val"
val_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225]),
])
val_ds     = ImageFolder(VAL_DIR, transform=val_transform)
val_loader = DataLoader(val_ds, batch_size=32, shuffle=False, num_workers=4)

print(f"Validation samples: {len(val_ds)},  Classes: {val_ds.classes}\n")

NUM_CLASSES = 3
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

model = timm.create_model("vit_base_patch16_224", pretrained=False, num_classes=NUM_CLASSES)
model.load_state_dict(torch.load("best_vit3cls.pth", map_location=DEVICE))
model.to(DEVICE)
model.eval()

all_preds  = []
all_labels = []

with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(DEVICE), labels.to(DEVICE)
        outputs = model(images)
        preds = outputs.argmax(dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

print("Classification Report (на маленькому валідному сеті):\n")
print(
    classification_report(
        all_labels,
        all_preds,
        target_names=val_ds.classes, 
        digits=2
    )
)

cm = confusion_matrix(all_labels, all_preds)

disp = ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=val_ds.classes)

fig, ax = plt.subplots(figsize=(8, 8))
disp.plot(ax=ax, cmap="viridis", xticks_rotation=45)

plt.title("Confusion Matrix)", fontsize=16)
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

log_df = pd.read_csv("training_log_vit3cls_20250531_025153.csv")

plt.figure(figsize=(8, 4))
plt.plot(log_df["epoch"], log_df["train_loss"], label="Train Loss", color="steelblue", linewidth=2)
plt.title("Train Loss по епохах", fontsize=14)
plt.xlabel("Epoch", fontsize=12)
plt.ylabel("Loss", fontsize=12)
plt.grid(True)
plt.legend(fontsize=12)
plt.tight_layout()
plt.show()

plt.figure(figsize=(8, 4))
plt.plot(log_df["epoch"], log_df["val_acc"], label="Val Accuracy (%)", color="orange", linewidth=2)
plt.title("Validation Accuracy (%) по епохах", fontsize=14)
plt.xlabel("Epoch", fontsize=12)
plt.ylabel("Accuracy (%)", fontsize=12)
plt.ylim(0, 100)      
plt.grid(True)
plt.legend(fontsize=12)
plt.tight_layout()
plt.show()


In [None]:
TEST_DIR = "F:/real_test"

test_ds = ImageFolder(TEST_DIR, transform=val_transform)
test_loader = DataLoader(test_ds, batch_size=32, shuffle=False, num_workers=4)

print(f" Test samples: {len(test_ds)}, Classes: {test_ds.classes}")
print("Приклади міток:", [test_ds[i][1] for i in range(min(10, len(test_ds)))])


In [None]:
import torch
import timm
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import numpy as np

val_transform = transforms.Compose([
    transforms.Resize(256),            
    transforms.CenterCrop(224),        
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225]),
])

TEST_DIR = "F:/real_test"   
test_ds = ImageFolder(TEST_DIR, transform=val_transform)
test_loader = DataLoader(test_ds, batch_size=32, shuffle=False, num_workers=4)

print(f"Test samples: {len(test_ds)}, Classes: {test_ds.classes}")
imgs, lbls = next(iter(test_loader))
print("Batch image shape:", imgs.shape, "  Batch labels example:", lbls[:10].tolist())

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
NUM_CLASSES = 3

model = timm.create_model("vit_base_patch16_224", pretrained=False, num_classes=NUM_CLASSES)
model.load_state_dict(torch.load("best_vit3cls.pth", map_location=DEVICE))
model = model.to(DEVICE)
model.eval()
print("ViT-модель (3 класи) завантажено та у режимі eval.")

all_preds = []
all_labels = []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(DEVICE), labels.to(DEVICE)
        outputs = model(images)
        preds = outputs.argmax(dim=1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

all_preds  = np.array(all_preds)
all_labels = np.array(all_labels)

test_acc = 100.0 * (all_preds == all_labels).mean()
print(f"\n Test Accuracy: {test_acc:.2f}%\n")
print("Classification Report:\n")
print(
    classification_report(
        all_labels,
        all_preds,
        target_names=test_ds.classes,
        digits=2
    )
)

cm = confusion_matrix(all_labels, all_preds)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=test_ds.classes)

fig, ax = plt.subplots(figsize=(6, 6))
disp.plot(ax=ax, cmap="viridis", colorbar=True, values_format='d')
ax.set_title("Confusion Matrix ", fontsize=16)
plt.xticks(rotation=45, ha="right")
plt.xlabel("Predicted label", fontsize=12)
plt.ylabel("True label", fontsize=12)
plt.tight_layout()
plt.show()


convnext base

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import timm
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torchvision import transforms
import numpy as np
import random
from tqdm import tqdm
import pandas as pd

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)
torch.backends.cudnn.benchmark = True  

NUM_CLASSES = 3
HEAD_ONLY_EPOCHS = 5
FULL_EPOCHS = 5
TOTAL_EPOCHS = HEAD_ONLY_EPOCHS + FULL_EPOCHS   
LR_HEAD = 1e-3
LR_FULL_BACKBONE = 3e-6
LR_FULL_HEAD = 1e-4
WEIGHT_DECAY = 1e-4
PATIENCE = 3 

train_transform_head = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandAugment(num_ops=2, magnitude=9),
    transforms.ToTensor(),
    transforms.RandomErasing(p=0.5),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225]),
])

train_transform_full = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225]),
])

val_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225]),
])

DATA_DIR = "F:/GroceryStoreDataset-master/dataset"
train_ds = ImageFolder(f"{DATA_DIR}/train", transform=train_transform_head)
val_ds   = ImageFolder(f"{DATA_DIR}/val",   transform=val_transform)

train_loader = DataLoader(train_ds, batch_size=16, shuffle=True,  num_workers=4)
val_loader   = DataLoader(val_ds,   batch_size=16, shuffle=False, num_workers=4)

model = timm.create_model('convnext_base', pretrained=False, num_classes=61)
state = torch.load("last_convnext.pth", map_location=device)
model.load_state_dict(state, strict=False)

in_ch = model.head.fc.in_features
model.head.fc = nn.Linear(in_ch, NUM_CLASSES)
model.to(device)

for p in model.parameters():
    p.requires_grad = False
for p in model.head.parameters():
    p.requires_grad = True

criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=LR_HEAD,
    weight_decay=WEIGHT_DECAY
)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=HEAD_ONLY_EPOCHS)

from torch.cuda.amp import GradScaler, autocast
scaler = GradScaler()

log_epochs      = []
log_train_loss  = []
log_val_acc     = []

best_val_acc = 0.0
epochs_no_improve = 0

for epoch in range(1, TOTAL_EPOCHS + 1):
    if epoch == HEAD_ONLY_EPOCHS + 1:
        train_ds.transform = train_transform_full
        for p in model.parameters():
            p.requires_grad = True

        backbone_params = []
        head_params     = []
        for name, param in model.named_parameters():
            if "head" in name:
                head_params.append(param)
            else:
                backbone_params.append(param)

        optimizer = optim.AdamW([
            {'params': backbone_params, 'lr': LR_FULL_BACKBONE},
            {'params': head_params, 'lr': LR_FULL_HEAD}
        ], weight_decay=WEIGHT_DECAY)

        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=FULL_EPOCHS)
        print(" Розморожено всю модель, починаємо full fine-tuning")

    model.train()
    running_loss = 0.0
    running_correct = 0
    for imgs, labels in tqdm(train_loader, desc=f"Epoch [{epoch}/{TOTAL_EPOCHS}]"):
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()

        with autocast():
            outputs = model(imgs)
            loss = criterion(outputs, labels)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item() * imgs.size(0)
        running_correct += (outputs.argmax(dim=1) == labels).sum().item()

    scheduler.step()

    avg_train_loss = running_loss / len(train_ds)

    model.eval()
    val_correct = 0
    val_total   = 0
    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            with autocast():
                outputs = model(imgs)
            val_correct += (outputs.argmax(dim=1) == labels).sum().item()
            val_total   += labels.size(0)

    val_acc = val_correct / val_total

    log_epochs.append(epoch)
    log_train_loss.append(avg_train_loss)
    log_val_acc.append(val_acc)

    print(f"Epoch {epoch}/{TOTAL_EPOCHS} | Train Loss = {avg_train_loss:.4f} | Val Acc = {val_acc*100:.2f}%")

    if val_acc > best_val_acc + 1e-4:
        best_val_acc = val_acc
        epochs_no_improve = 0
        torch.save(model.state_dict(), "best_convnext_3classes.pth")
        print(f"Збережено нову найкращу модель (Acc: {best_val_acc*100:.2f}%)")
    else:
        epochs_no_improve += 1
        print(f"No improvement for {epochs_no_improve} epoch(s)")

    if epochs_no_improve >= PATIENCE:
        print(f" Early stopping: більше {PATIENCE} епох без покращення.")
        break

print("Тренування завершено.")

log_df = pd.DataFrame({
    "epoch":      log_epochs,
    "train_loss": log_train_loss,
    "val_acc":    log_val_acc
})
csv_filename = "training_log_convnext3cls.csv"
log_df.to_csv(csv_filename, index=False)
print(f" Лог тренування збережено у файл: {csv_filename}")


In [None]:
import torch
import torch.nn as nn
import torchvision
import timm
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torchvision import transforms
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
NUM_CLASSES = 3
DATA_DIR = "F:/GroceryStoreDataset-master/dataset"

val_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225]),
])

val_ds = ImageFolder(f"{DATA_DIR}/val", transform=val_transform)
val_loader = DataLoader(val_ds, batch_size=32, shuffle=False, num_workers=4)

model = timm.create_model('convnext_base', pretrained=False, num_classes=NUM_CLASSES)

checkpoint = torch.load("best_convnext_3classes.pth", map_location=device)
model.load_state_dict(checkpoint)
model.to(device)
model.eval()

all_preds = []
all_labels = []

with torch.no_grad():
    for images, labels in val_loader:
        images = images.to(device)
        outputs = model(images)
        preds = outputs.argmax(dim=1).cpu().numpy()
        all_preds.extend(preds)
        all_labels.extend(labels.numpy())

print("Classification Report:\n")
print(classification_report(
    all_labels,
    all_preds,
    target_names=val_ds.classes,  
    digits=4
))

cm = confusion_matrix(all_labels, all_preds)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=val_ds.classes)

fig, ax = plt.subplots(figsize=(8, 8))
disp.plot(ax=ax, cmap="viridis", xticks_rotation=45)
plt.title("Confusion Matrix")
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

csv_path = "training_log_convnext3cls.csv"
log_df = pd.read_csv(csv_path)

plt.figure(figsize=(8,4))
plt.plot(log_df["epoch"], log_df["train_loss"], label="Train Loss", color="steelblue", linewidth=2)
plt.title("Train Loss по епохах", fontsize=14)
plt.xlabel("Epoch", fontsize=12)
plt.ylabel("Loss", fontsize=12)
plt.grid(True)
plt.legend(fontsize=12)
plt.tight_layout()
plt.show()

plt.figure(figsize=(8,4))
plt.plot(log_df["epoch"], log_df["val_acc"]*100, label="Val Accuracy (%)", color="orange", linewidth=2)
plt.title("Validation Accuracy (%) по епохах", fontsize=14)
plt.xlabel("Epoch", fontsize=12)
plt.ylabel("Accuracy (%)", fontsize=12)
plt.ylim(0,100)
plt.grid(True)
plt.legend(fontsize=12)
plt.tight_layout()
plt.show()


In [None]:
import torch
import timm
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import numpy as np

val_transform = transforms.Compose([
    transforms.Resize(256),       
    transforms.CenterCrop(224),   
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225]),
])

TEST_DIR = "F:/real_test"  
test_ds = ImageFolder(TEST_DIR, transform=val_transform)
test_loader = DataLoader(test_ds, batch_size=16, shuffle=False, num_workers=4)

print(f"Test samples: {len(test_ds)}, Classes: {test_ds.classes}")
print("Приклади міток:", [test_ds[i][1] for i in range(min(10, len(test_ds)))])

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
NUM_CLASSES = 3

model = timm.create_model('convnext_base', pretrained=False, num_classes=NUM_CLASSES)
checkpoint = torch.load("best_convnext_3classes.pth", map_location=DEVICE)
model.load_state_dict(checkpoint)
model = model.to(DEVICE)
model.eval()
print("ConvNeXt-Base (3 класи) завантажено та в режимі eval.")

all_preds = []
all_labels = []

with torch.no_grad():
    for imgs, labels in test_loader:
        imgs = imgs.to(DEVICE)
        outputs = model(imgs)
        preds = outputs.argmax(dim=1).cpu().numpy()
        all_preds.extend(preds)
        all_labels.extend(labels.numpy())

all_preds = np.array(all_preds)
all_labels = np.array(all_labels)

test_acc = 100.0 * (all_preds == all_labels).mean()
print(f"\nTest Accuracy: {test_acc:.2f}%\n")
print("Classification Report:\n")
print(classification_report(
    all_labels,
    all_preds,
    target_names=test_ds.classes,
    digits=4
))

cm = confusion_matrix(all_labels, all_preds)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=test_ds.classes)

fig, ax = plt.subplots(figsize=(6, 6))
disp.plot(ax=ax, cmap="viridis", colorbar=True, values_format='d')
ax.set_title("Confusion Matrix", fontsize=14)
plt.xticks(rotation=45, ha="right")
plt.xlabel("Predicted label", fontsize=12)
plt.ylabel("True label", fontsize=12)
plt.tight_layout()
plt.show()


In [None]:
import torch
import timm
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import numpy as np

val_transform = transforms.Compose([
    transforms.Resize(256),       
    transforms.CenterCrop(224),   
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225]),
])

TEST_DIR = "F:/real_test"  
test_ds = ImageFolder(TEST_DIR, transform=val_transform)
test_loader = DataLoader(test_ds, batch_size=16, shuffle=False, num_workers=4)

print(f"Test samples: {len(test_ds)}, Classes: {test_ds.classes}")
print("Приклади міток:", [test_ds[i][1] for i in range(min(10, len(test_ds)))])

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
NUM_CLASSES = 3

model = timm.create_model('convnext_base', pretrained=False, num_classes=NUM_CLASSES)
checkpoint = torch.load("best_convnext_3classes.pth", map_location=DEVICE)
model.load_state_dict(checkpoint)
model = model.to(DEVICE)
model.eval()
print("ConvNeXt-Base (3 класи) завантажено та в режимі eval.")

all_preds = []
all_labels = []

with torch.no_grad():
    for imgs, labels in test_loader:
        imgs = imgs.to(DEVICE)
        outputs = model(imgs)
        preds = outputs.argmax(dim=1).cpu().numpy()
        all_preds.extend(preds)
        all_labels.extend(labels.numpy())

all_preds = np.array(all_preds)
all_labels = np.array(all_labels)

test_acc = 100.0 * (all_preds == all_labels).mean()
print(f"\nTest Accuracy: {test_acc:.2f}%\n")
print("Classification Report:\n")
print(classification_report(
    all_labels,
    all_preds,
    target_names=test_ds.classes,
    digits=4
))

cm = confusion_matrix(all_labels, all_preds)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=test_ds.classes)

fig, ax = plt.subplots(figsize=(6, 6))
disp.plot(ax=ax, cmap="viridis", colorbar=True, values_format='d')
ax.set_title("Confusion Matrix", fontsize=14)
plt.xticks(rotation=45, ha="right")
plt.xlabel("Predicted label", fontsize=12)
plt.ylabel("True label", fontsize=12)
plt.tight_layout()
plt.show()


In [None]:
from sklearn.metrics import precision_recall_fscore_support

precision, recall, f1, support = precision_recall_fscore_support(
    all_labels,
    all_preds,
    labels=[0,1,2],            
    zero_division=0
)

report_df = pd.DataFrame({
    "class_name":   test_ds.classes,  
    "precision":    precision,
    "recall":       recall,
    "f1_score":     f1,
    "support":      support
})

report_df.loc[len(report_df)] = [
    "macro avg",
    precision.mean(),
    recall.mean(),
    f1.mean(),
    support.sum()
]
report_df.loc[len(report_df)] = [
    "weighted avg",
    np.average(precision, weights=support),
    np.average(recall, weights=support),
    np.average(f1, weights=support),
    support.sum()
]

csv_report = "classification_report_convnext3cls.csv"
report_df.to_csv(csv_report, index=False)
print(f"Збережено Classification Report у файл: {csv_report}")


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

csv_path = "training_log_convnext3cls.csv" 
log_df = pd.read_csv(csv_path)

print("Стовпці в CSV:", list(log_df.columns))

plt.figure(figsize=(8, 4))
plt.plot(
    log_df["epoch"],
    log_df["train_loss"],
    label="Train Loss",
    color="steelblue",
    linewidth=2
)
plt.title("Train Loss по епохах (ConvNeXt-3класи)", fontsize=14)
plt.xlabel("Epoch", fontsize=12)
plt.ylabel("Loss", fontsize=12)
plt.grid(True)
plt.legend(fontsize=12)
plt.tight_layout()
plt.show()

plt.figure(figsize=(8, 4))
plt.plot(
    log_df["epoch"],
    log_df["val_acc"] * 100,  
    label="Val Accuracy (%)",
    color="orange",
    linewidth=2
)
plt.title("Validation Accuracy (%) по епохах (ConvNeXt-3класи)", fontsize=14)
plt.xlabel("Epoch", fontsize=12)
plt.ylabel("Accuracy (%)", fontsize=12)
plt.ylim(0, 100)
plt.grid(True)
plt.legend(fontsize=12)
plt.tight_layout()
plt.show()
