In [20]:
import albumentations as A
import cv2
import os
from tqdm import tqdm

SRC_DIR = "data/raw"
DST_DIR = "data/augmented"

# Создаём папки для аугментированных данных
for cls in ["food", "non_food"]:
    os.makedirs(os.path.join(DST_DIR, cls), exist_ok=True)

# Трансформации
transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.Rotate(limit=25, p=0.5),
    A.RandomBrightnessContrast(p=0.5),
    A.RandomResizedCrop(size=(224,224), scale=(0.8,1.0), ratio=(0.9,1.1), p=0.5)
])

# Аугментация
for cls in ["food", "non_food"]:
    src_cls = os.path.join(SRC_DIR, cls)
    dst_cls = os.path.join(DST_DIR, cls)

    for fname in tqdm(os.listdir(src_cls), desc=f"Class {cls}"):
        img_path = os.path.join(src_cls, fname)
        img = cv2.imread(img_path)
        if img is None:
            continue

        # Сохраняем оригинал
        cv2.imwrite(os.path.join(dst_cls, fname), img)

        # Генерируем 3 новых аугментированных версии
        for i in range(3):
            augmented = transform(image=img)['image']
            name, ext = os.path.splitext(fname)
            cv2.imwrite(os.path.join(dst_cls, f"{name}_aug{i}{ext}"), augmented)

print("Аугментация завершена!")
!ls data/augmented/food | head -n 5
!ls data/augmented/non_food | head -n 5

Class food: 100%|██████████| 500/500 [00:14<00:00, 34.65it/s]
Class non_food: 100%|██████████| 500/500 [00:18<00:00, 26.51it/s]


Аугментация завершена!
0_aug0.jpg
0_aug1.jpg
0_aug2.jpg
0.jpg
100_aug0.jpg
0_aug0.jpg
0_aug1.jpg
0_aug2.jpg
0.jpg
100_aug0.jpg


In [22]:
# Установка библиотек
!pip install torch torchvision torchaudio --quiet
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
import os
import csv

# -------------------------------
# 1. Параметры обучения
# -------------------------------
BATCH_SIZE = 32
EPOCHS = 3
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# -------------------------------
# 2. Трансформации для данных
# -------------------------------
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
])

# -------------------------------
# 3. Загружаем датасет
# Можно менять между data/raw и data/augmented
# -------------------------------
DATA_DIR = "data/augmented"
train_dataset = datasets.ImageFolder(DATA_DIR, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

# -------------------------------
# 4. Определяем модели
# -------------------------------
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3,32,3,padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32,64,3,padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64,128,3,padding=1), nn.ReLU(), nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128*28*28, 256),
            nn.ReLU(),
            nn.Linear(256, 2)
        )
    def forward(self,x):
        x = self.features(x)
        x = self.classifier(x)
        return x

# ResNet18 и EfficientNet_B0 с pretrained=True
resnet18 = models.resnet18(pretrained=True)
resnet18.fc = nn.Linear(resnet18.fc.in_features, 2)

efficientnet = models.efficientnet_b0(pretrained=True)
efficientnet.classifier[1] = nn.Linear(efficientnet.classifier[1].in_features, 2)

models_dict = {
    "SimpleCNN": SimpleCNN(),
    "ResNet18": resnet18,
    "EfficientNet_B0": efficientnet
}

# -------------------------------
# 5. Функция тренировки с локальным логом
# -------------------------------
def train_model(model, loader, epochs=EPOCHS, log_file="training_log.csv"):
    model = model.to(DEVICE)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-4)

    # Создаём CSV для логирования
    with open(log_file, "a", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(["Model", "Epoch", "Loss", "Accuracy"])

    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        for imgs, labels in loader:
            imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * imgs.size(0)
            _, predicted = torch.max(outputs,1)
            correct += (predicted==labels).sum().item()
            total += labels.size(0)

        epoch_loss = running_loss / total
        epoch_acc = correct / total
        print(f"{model.__class__.__name__} Epoch {epoch+1}/{epochs} - Loss: {epoch_loss:.4f}, Acc: {epoch_acc:.4f}")

        # Сохраняем в CSV
        with open(log_file, "a", newline="") as f:
            writer = csv.writer(f)
            writer.writerow([model.__class__.__name__, epoch+1, epoch_loss, epoch_acc])

    return model

# -------------------------------
# 6. Обучение всех моделей
# -------------------------------
for name, model in models_dict.items():
    print(f"=== Training {name} ===")
    train_model(model, train_loader)



Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:01<00:00, 32.3MB/s]


Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth


100%|██████████| 20.5M/20.5M [00:00<00:00, 22.0MB/s]


=== Training SimpleCNN ===
SimpleCNN Epoch 1/3 - Loss: 0.4925, Acc: 0.7540
SimpleCNN Epoch 2/3 - Loss: 0.3618, Acc: 0.8430
SimpleCNN Epoch 3/3 - Loss: 0.2890, Acc: 0.8790
=== Training ResNet18 ===
ResNet Epoch 1/3 - Loss: 0.0648, Acc: 0.9762
ResNet Epoch 2/3 - Loss: 0.0059, Acc: 0.9982
ResNet Epoch 3/3 - Loss: 0.0044, Acc: 0.9990
=== Training EfficientNet_B0 ===
EfficientNet Epoch 1/3 - Loss: 0.1792, Acc: 0.9435
EfficientNet Epoch 2/3 - Loss: 0.0233, Acc: 0.9965
EfficientNet Epoch 3/3 - Loss: 0.0085, Acc: 0.9990


In [34]:
!pip install torchvision --quiet

import os, csv, torch
import torch.nn as nn, torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from tqdm import tqdm
from sklearn.metrics import accuracy_score

# ==== Параметры ====
DATA_DIR = "data/raw"
EPOCHS = 3
BATCH_SIZE = 32
LR = 1e-4
LOG_FILE = "training_log_raw.csv"
MODEL_DIR = "models"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

os.makedirs(MODEL_DIR, exist_ok=True)

# ==== Препроцессинг (без аугментаций) ====
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

dataset = datasets.ImageFolder(DATA_DIR, transform=transform)
val_split = int(0.2 * len(dataset))
train_split = len(dataset) - val_split

if val_split == 0:
    train_dataset = dataset
    val_dataset = dataset
else:
    from torch.utils.data import random_split
    train_dataset, val_dataset = random_split(dataset, [train_split, val_split])

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

print(f"Train samples: {len(train_dataset)}, Val samples: {len(val_dataset)}")
print(f"Device: {DEVICE}")

# ==== SimpleCNN ====
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3,32,3,padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32,64,3,padding=1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(64,128,3,padding=1), nn.ReLU(), nn.MaxPool2d(2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128*28*28, 256),
            nn.ReLU(),
            nn.Linear(256, len(dataset.classes))
        )
    def forward(self,x):
        x = self.features(x)
        x = self.classifier(x)
        return x

# ==== Модели ====
def make_models(num_classes):
    simple = SimpleCNN()
    res = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
    res.fc = nn.Linear(res.fc.in_features, num_classes)
    eff = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.IMAGENET1K_V1)
    eff.classifier[1] = nn.Linear(eff.classifier[1].in_features, num_classes)
    return {"SimpleCNN": simple, "ResNet18": res, "EfficientNet_B0": eff}

# ==== Оценка ====
def evaluate_model(model, loader, device):
    model.eval()
    ys, preds = [], []
    with torch.no_grad():
        for xb, yb in loader:
            xb, yb = xb.to(device), yb.to(device)
            out = model(xb)
            pred = torch.argmax(out, dim=1)
            ys.extend(yb.cpu().numpy().tolist())
            preds.extend(pred.cpu().numpy().tolist())
    return accuracy_score(ys, preds)

# ==== Обучение + лог ====
def train_and_log(name, model, train_loader, val_loader, epochs, log_file, dataset_tag):
    model = model.to(DEVICE)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=LR)

    write_header = not os.path.exists(log_file)
    if write_header:
        with open(log_file, "w", newline="") as f:
            writer = csv.writer(f)
            writer.writerow(["Model", "Dataset", "Epoch", "Loss", "Accuracy"])

    for epoch in range(1, epochs+1):
        model.train()
        running_loss, correct, total = 0.0, 0, 0
        for xb, yb in train_loader:
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            optimizer.zero_grad()
            out = model(xb)
            loss = criterion(out, yb)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * xb.size(0)
            _, predicted = torch.max(out,1)
            correct += (predicted==yb).sum().item()
            total += yb.size(0)
        epoch_loss = running_loss / total if total>0 else 0.0
        epoch_acc = correct / total if total>0 else 0.0
        val_acc = evaluate_model(model, val_loader, DEVICE) if len(val_loader)>0 else epoch_acc

        print(f"{name} Epoch {epoch}/{epochs} - Loss: {epoch_loss:.4f}, TrainAcc: {epoch_acc:.4f}, ValAcc: {val_acc:.4f}")

        with open(log_file, "a", newline="") as f:
            writer = csv.writer(f)
            writer.writerow([name, dataset_tag, epoch, epoch_loss, val_acc])

    # сохраняем модель отдельно
    out_path = os.path.join(MODEL_DIR, f"{name}_raw.pth")
    torch.save(model.state_dict(), out_path)
    print(f" Model saved: {out_path}")
    return model

# ==== Запуск эксперимента ====
num_classes = len(dataset.classes)
models_dict_new = make_models(num_classes)

for name, model in models_dict_new.items():
    print(f"=== Training {name} on RAW data ===")
    trained = train_and_log(name, model, train_loader, val_loader, EPOCHS, LOG_FILE, dataset_tag="raw")
    del trained
    torch.cuda.empty_cache()

print(f"\nЭксперимент на data/raw завершён \nЛоги сохранены в {LOG_FILE}")

Train samples: 800, Val samples: 200
Device: cpu
=== Training SimpleCNN on RAW data ===
SimpleCNN Epoch 1/3 - Loss: 0.5597, TrainAcc: 0.7113, ValAcc: 0.7050
SimpleCNN Epoch 2/3 - Loss: 0.4175, TrainAcc: 0.8187, ValAcc: 0.7750
SimpleCNN Epoch 3/3 - Loss: 0.3381, TrainAcc: 0.8575, ValAcc: 0.8150
✅ Model saved: models/SimpleCNN_raw.pth
=== Training ResNet18 on RAW data ===
ResNet18 Epoch 1/3 - Loss: 0.1682, TrainAcc: 0.9337, ValAcc: 0.9400
ResNet18 Epoch 2/3 - Loss: 0.0261, TrainAcc: 0.9912, ValAcc: 0.9600
ResNet18 Epoch 3/3 - Loss: 0.0056, TrainAcc: 1.0000, ValAcc: 0.9750
✅ Model saved: models/ResNet18_raw.pth
=== Training EfficientNet_B0 on RAW data ===
EfficientNet_B0 Epoch 1/3 - Loss: 0.4420, TrainAcc: 0.8462, ValAcc: 0.9450
EfficientNet_B0 Epoch 2/3 - Loss: 0.1509, TrainAcc: 0.9738, ValAcc: 0.9750
EfficientNet_B0 Epoch 3/3 - Loss: 0.0727, TrainAcc: 0.9900, ValAcc: 0.9750
✅ Model saved: models/EfficientNet_B0_raw.pth

Эксперимент на data/raw завершён ✅
Логи сохранены в training_log_ra

In [47]:

!git add /content/food-nofood-/notebooks/Food_vs_NonFood_Pipeline.ipynb
!git commit -m "Обновлён ноутбук с последними экспериментами"
!git push

On branch main
Your branch is up to date with 'origin/main'.

Untracked files:
  (use "git add <file>..." to include in what will be committed)
	[31mdata/[m
	[31mfood5k-image-dataset.zip[m
	[31msrc/[m

nothing added to commit but untracked files present (use "git add" to track)
Everything up-to-date
