In [109]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here are several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
       os.path.join(dirname, filename)

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [110]:
# Image Classification
import tensorflow as tf

data_dir = "/kaggle/input/fresh-rotten-and-formalin-mixed-fruit-detection/Fruits Original-ML-GRP-07"

dataset = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    image_size=(128, 128),  # resize all images
    batch_size=32           # number of images per batch
)

class_names = dataset.class_names
print("Classes:", class_names)


Found 10153 files belonging to 5 classes.
Classes: ['Apple', 'Banana', 'Grape', 'Mango', 'Orange']


In [111]:
# ============================================================
# BYOL (from scratch, PyTorch) + Linear Evaluation on Fruits
# ============================================================

# 0) (Colab) Ensure deps (torch/torchvision are preinstalled on Colab)
# !pip install torch torchvision --quiet

import os, copy, math, random
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms, models


In [112]:
# 1) Reproducibility + Device
seed = 123
random.seed(seed); torch.manual_seed(seed); torch.cuda.manual_seed_all(seed)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Device:", device)

Device: cuda


In [113]:
# 2) Paths & Hyperparams
data_dir = "/kaggle/input/fresh-rotten-and-formalin-mixed-fruit-detection/Fruits Original-ML-GRP-07"
image_size = 128
batch_size_pretrain = 64     # reduce if you see CUDA OOM
batch_size_finetune = 32
epochs_pretrain = 10         # increase for better representations
epochs_finetune = 15
m_ema = 0.996                # BYOL EMA momentum
proj_hidden = 1024           # MLP hidden
proj_out = 256               # projection / prediction dim
base_lr = 1e-3

In [114]:
# 3) BYOL Augmentations (TwoCropsTransform)
imagenet_mean = (0.485, 0.456, 0.406)
imagenet_std  = (0.229, 0.224, 0.225)

augmentation = transforms.Compose([
    transforms.RandomResizedCrop(image_size, scale=(0.2, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomApply([transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)], p=0.8),
    transforms.RandomGrayscale(p=0.2),
    transforms.GaussianBlur(kernel_size=9, sigma=(0.1, 2.0)),
    transforms.ToTensor(),
    transforms.Normalize(mean=imagenet_mean, std=imagenet_std),
])

class TwoCropsTransform:
    """Create two differently augmented views of the same image."""
    def __init__(self, base_transform):
        self.base_transform = base_transform
    def __call__(self, x):
        return self.base_transform(x), self.base_transform(x)

# For fine-tuning (no heavy augs)
eval_transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=imagenet_mean, std=imagenet_std),
])

In [115]:
# 4) Datasets & Loaders
# Pretraining dataset returns ((x1, x2), label) — label is ignored
pretrain_dataset = datasets.ImageFolder(root=data_dir, transform=TwoCropsTransform(augmentation))
pretrain_loader = DataLoader(
    pretrain_dataset,
    batch_size=batch_size_pretrain,
    shuffle=True,
    num_workers=2,
    pin_memory=True,
    drop_last=True,  # important for BN stability
)

# For linear eval (labeled)
full_supervised = datasets.ImageFolder(root=data_dir, transform=eval_transform)
num_classes = len(full_supervised.classes)
print("Detected classes:", full_supervised.classes)

# Split 80/20 for train/test
train_len = int(0.8 * len(full_supervised))
test_len = len(full_supervised) - train_len
sup_train, sup_test = random_split(full_supervised, [train_len, test_len])
train_loader = DataLoader(sup_train, batch_size=batch_size_finetune, shuffle=True, num_workers=2, pin_memory=True)
test_loader  = DataLoader(sup_test,  batch_size=batch_size_finetune, shuffle=False, num_workers=2, pin_memory=True)

Detected classes: ['Apple', 'Banana', 'Grape', 'Mango', 'Orange']


In [116]:
# 5) Backbone (ResNet18 without classifier)
def get_backbone():
    resnet = models.resnet18(weights=None)  # weights="IMAGENET1K_V1" is okay too if you want a warm-start
    backbone = nn.Sequential(*list(resnet.children())[:-1])  # to get (B, 512, 1, 1)
    feat_dim = 512
    return backbone, feat_dim

backbone_online, feat_dim = get_backbone()
backbone_target = copy.deepcopy(backbone_online)


In [117]:
# 6) BYOL MLP Heads
class MLP(nn.Module):
    """2-layer MLP with BatchNorm, used for projector and predictor."""
    def __init__(self, in_dim, hidden_dim, out_dim, last_bn=True):
        super().__init__()
        self.fc1 = nn.Linear(in_dim, hidden_dim)
        self.bn1 = nn.BatchNorm1d(hidden_dim)
        self.relu = nn.ReLU(inplace=True)
        self.fc2 = nn.Linear(hidden_dim, out_dim)
        self.last_bn = last_bn
        if last_bn:
            self.bn2 = nn.BatchNorm1d(out_dim, affine=False)  # projector's last BN (no affine) as in BYOL
    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.fc2(x)
        if self.last_bn:
            x = self.bn2(x)
        return x

projector_online = MLP(feat_dim, proj_hidden, proj_out, last_bn=True)
predictor_online = MLP(proj_out, proj_hidden, proj_out, last_bn=False)  # predictor has no last BN
projector_target = copy.deepcopy(projector_online)

# Target network does not get gradients
for p in backbone_target.parameters():  p.requires_grad = False
for p in projector_target.parameters(): p.requires_grad = False

# Move to device
backbone_online.to(device)
projector_online.to(device)
predictor_online.to(device)
backbone_target.to(device)
projector_target.to(device)


MLP(
  (fc1): Linear(in_features=512, out_features=1024, bias=True)
  (bn1): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (fc2): Linear(in_features=1024, out_features=256, bias=True)
  (bn2): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
)

In [118]:
# 7) Helper functions (EMA update + BYOL loss)
@torch.no_grad()
def update_moving_average(m: float):
    """EMA update for target net parameters from online net parameters."""
    for online, target in zip(backbone_online.parameters(), backbone_target.parameters()):
        target.data = target.data * m + online.data * (1.0 - m)
    for online, target in zip(projector_online.parameters(), projector_target.parameters()):
        target.data = target.data * m + online.data * (1.0 - m)

def flatten_features(x):
    # x: (B, C, 1, 1) -> (B, C)
    return torch.flatten(x, 1)

def l2_normalize(x, dim=1, eps=1e-8):
    return x / (x.norm(p=2, dim=dim, keepdim=True) + eps)

def byol_loss_fn(p, z):
    """Negative cosine similarity between p (online prediction) and z (target projection, stop-grad)."""
    p = l2_normalize(p, dim=1)
    z = l2_normalize(z, dim=1)
    # cosine similarity -> mean over batch; BYOL uses 2 - 2 * cos, but minimizing -cos is equivalent up to a constant
    return 2 - 2 * (p * z).sum(dim=1).mean()

In [119]:
# 8) Optimizer
params = list(backbone_online.parameters()) + list(projector_online.parameters()) + list(predictor_online.parameters())
optimizer = torch.optim.Adam(params, lr=base_lr)


In [120]:
# 9) BYOL Pretraining
import torch
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingLR

# 1. Define the optimizer first
params = list(backbone_online.parameters()) + \
         list(projector_online.parameters()) + \
         list(predictor_online.parameters())
optimizer = optim.AdamW(params, lr=3e-4, weight_decay=1e-6)

# 2. Initialize the scheduler
scheduler = CosineAnnealingLR(optimizer, T_max=epochs_pretrain)

# 3. Training loop
print("=== BYOL pretraining ===")
for epoch in range(1, epochs_pretrain + 1):
    running = 0.0
    m = momentum_by_epoch(epoch, epochs_pretrain)

    for (x1, x2), _ in pretrain_loader:
        x1 = x1.to(device, non_blocking=True)
        x2 = x2.to(device, non_blocking=True)

        # Online forward pass
        y1 = flatten(backbone_online(x1))
        z1 = projector_online(y1)
        p1 = predictor_online(z1)
        
        y2 = flatten(backbone_online(x2))
        z2 = projector_online(y2)
        p2 = predictor_online(z2)

        # Target forward pass (no gradients)
        with torch.no_grad():
            y1_t = flatten(backbone_target(x1))
            z1_t = projector_target(y1_t)
            y2_t = flatten(backbone_target(x2))
            z2_t = projector_target(y2_t)

        loss = byol_loss(p1, z2_t) + byol_loss(p2, z1_t)

        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(params, 5.0)
        optimizer.step()

        ema_update(m)
        running += loss.item()

    # Update learning rate
    scheduler.step()
    
    print(f"Epoch [{epoch}/{epochs_pretrain}]  BYOL Loss: {running/len(pretrain_loader):.4f}")

=== BYOL pretraining ===
Epoch [1/10]  BYOL Loss: 2.6630
Epoch [2/10]  BYOL Loss: 1.4661
Epoch [3/10]  BYOL Loss: 1.1096
Epoch [4/10]  BYOL Loss: 0.9146
Epoch [5/10]  BYOL Loss: 0.8353
Epoch [6/10]  BYOL Loss: 0.7257
Epoch [7/10]  BYOL Loss: 0.6791
Epoch [8/10]  BYOL Loss: 0.6121
Epoch [9/10]  BYOL Loss: 0.5893
Epoch [10/10]  BYOL Loss: 0.5692


In [121]:
# 10) Linear Evaluation / Fine-tuning
# --- freeze encoder and fix it in eval mode ---
for p in backbone_online.parameters():
    p.requires_grad = False
backbone_online.eval()  # VERY IMPORTANT: keep BN frozen

linear_head = nn.Linear(feat_dim, num_classes).to(device)

criterion = nn.CrossEntropyLoss(label_smoothing=0.05)  # mild smoothing helps
optimizer_cls = torch.optim.SGD(
    linear_head.parameters(),
    lr=0.1, momentum=0.9, weight_decay=1e-4
)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer_cls, T_max=epochs_finetune * len(train_loader)
)

print("\n=== Linear evaluation (backbone frozen, BN fixed) ===")
for epoch in range(1, epochs_finetune + 1):
    linear_head.train()                  # ONLY the head trains
    running_loss, correct, total = 0.0, 0, 0

    for imgs, labels in train_loader:
        imgs = imgs.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)

        with torch.no_grad():            # no grads & no BN updates in the encoder
            feats = torch.flatten(backbone_online(imgs), 1)

        logits = linear_head(feats)
        loss = criterion(logits, labels)

        optimizer_cls.zero_grad(set_to_none=True)
        loss.backward()
        optimizer_cls.step()
        scheduler.step()

        running_loss += loss.item()
        preds = logits.argmax(1)
        total += labels.size(0)
        correct += (preds == labels).sum().item()

    acc = 100.0 * correct / total
    print(f"Epoch [{epoch}/{epochs_finetune}]  Loss: {running_loss/len(train_loader):.4f}  Train Acc: {acc:.2f}%")



=== Linear evaluation (backbone frozen, BN fixed) ===
Epoch [1/15]  Loss: 3.8085  Train Acc: 72.29%
Epoch [2/15]  Loss: 2.6392  Train Acc: 79.25%
Epoch [3/15]  Loss: 1.9428  Train Acc: 82.46%
Epoch [4/15]  Loss: 2.2512  Train Acc: 81.30%
Epoch [5/15]  Loss: 2.0031  Train Acc: 82.49%
Epoch [6/15]  Loss: 1.5654  Train Acc: 85.04%
Epoch [7/15]  Loss: 1.1389  Train Acc: 87.02%
Epoch [8/15]  Loss: 0.8832  Train Acc: 88.44%
Epoch [9/15]  Loss: 0.7867  Train Acc: 89.01%
Epoch [10/15]  Loss: 0.5492  Train Acc: 92.45%
Epoch [11/15]  Loss: 0.4959  Train Acc: 93.60%
Epoch [12/15]  Loss: 0.4561  Train Acc: 94.19%
Epoch [13/15]  Loss: 0.4250  Train Acc: 95.00%
Epoch [14/15]  Loss: 0.4129  Train Acc: 95.16%
Epoch [15/15]  Loss: 0.4053  Train Acc: 95.27%


In [124]:
# 11) Evaluation on hold-out set
# First define a simple classifier on top of BYOL features
class LinearClassifier(nn.Module):
    def __init__(self, feat_dim=512, num_classes=10):
        super().__init__()
        self.fc = nn.Linear(feat_dim, num_classes)
    
    def forward(self, x):
        return self.fc(x)

# Initialize classifier
clf = LinearClassifier().to(device)

# Train classifier (simplified example)
backbone_online.eval()  # Use the trained BYOL backbone
optimizer = torch.optim.Adam(clf.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

print("Training classifier...")
for epoch in range(10):  # Short classifier training
    for imgs, labels in train_loader:
        imgs = imgs.to(device)
        labels = labels.to(device)
        
        with torch.no_grad():
            features = flatten(backbone_online(imgs))
        
        logits = clf(features)
        loss = criterion(logits, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

# Now evaluate
clf.eval()
total, correct = 0, 0
with torch.no_grad():
    for imgs, labels in test_loader:
        imgs = imgs.to(device)
        labels = labels.to(device)
        
        # Extract features using BYOL backbone
        features = flatten(backbone_online(imgs))
        
        # Classify
        logits = clf(features)
        preds = logits.argmax(1)
        
        total += labels.size(0)
        correct += (preds == labels).sum().item()

print(f"\nTest Accuracy: {100.0 * correct / total:.2f}%")

Training classifier...

Test Accuracy: 89.76%
