In [None]:
# Merged from multiple folders into a clean dataset folder, avoiding filename conflicts.

import os
import shutil

BASE = "mango_dataset"
SRC_FOLDERS = ["new_images", "old_images"]   # inside BASE
DST_BASE = "mango_dataset_clean"

VARIETIES = [
    "AMRAPALLI",
    "ARKA NEELACHAL KESARI",
    "BANGANPALLI",
    "DASHEHARI",
    "SUVARNA REKHA",
    "TOTAPURI",
]

os.makedirs(DST_BASE, exist_ok=True)

def safe_copy(src_path, dst_dir):
    os.makedirs(dst_dir, exist_ok=True)
    filename = os.path.basename(src_path)
    dst_path = os.path.join(dst_dir, filename)

    # avoid overwrite if same name
    if os.path.exists(dst_path):
        name, ext = os.path.splitext(filename)
        i = 1
        while True:
            new_name = f"{name}_{i}{ext}"
            new_path = os.path.join(dst_dir, new_name)
            if not os.path.exists(new_path):
                dst_path = new_path
                break
            i += 1
    shutil.copy2(src_path, dst_path)

for variety in VARIETIES:
    dst_var_dir = os.path.join(DST_BASE, variety.replace(" ", "_"))
    os.makedirs(dst_var_dir, exist_ok=True)

    for src_root in SRC_FOLDERS:
        variety_dir = os.path.join(BASE, src_root, variety)
        if not os.path.isdir(variety_dir):
            continue

        for root, dirs, files in os.walk(variety_dir):
            for f in files:
                src_path = os.path.join(root, f)
                safe_copy(src_path, dst_var_dir)

print("Done creating mango_dataset_clean.")


Done creating mango_dataset_clean.


STEP 1: Dataset sanity check & cleanup          ‚úÖ DONE
STEP 2: Load dataset + basic transforms         ‚úÖ DONE
STEP 3: Train / Validation split                ‚¨ÖÔ∏è NEXT
STEP 4: Data augmentation (TRAIN only)          ‚è≥ LATER
STEP 5: Class imbalance handling (weights)      ‚è≥ LATER
STEP 6: Build transfer learning model
STEP 7: Train (freeze backbone)
STEP 8: Fine-tune (optional)
STEP 9: Evaluate


In [None]:
# List directory tree seeing only folders
import os

def list_dir_tree(start_path):
    for root, dirs, files in os.walk(start_path):
        level = root.replace(start_path, "").count(os.sep)
        indent = " " * 4 * level
        print(f"{indent}{os.path.basename(root)}/")  # only folders

if __name__ == "__main__":
    list_dir_tree("mango_dataset_clean")  # or DST_BASE


mango_dataset_clean/
    AMRAPALLI/
    ARKA_NEELACHAL_KESARI/
    BANGANPALLI/
    DASHEHARI/
    SUVARNA_REKHA/
    TOTAPURI/


In [None]:
# calculate number of images per variety in the clean dataset
import os

DATA_DIR = "mango_dataset_clean"

def count_images_per_class(root_dir):
    for class_name in sorted(os.listdir(root_dir)):
        class_path = os.path.join(root_dir, class_name)
        if not os.path.isdir(class_path):
            continue

        count = 0
        # walk in case there are subfolders (usually there aren't now)
        for _, _, files in os.walk(class_path):
            count += len(files)

        print(f"{class_name}: {count} images")

if __name__ == "__main__":
    count_images_per_class(DATA_DIR)


AMRAPALLI: 460 images
ARKA_NEELACHAL_KESARI: 541 images
BANGANPALLI: 443 images
DASHEHARI: 438 images
SUVARNA_REKHA: 292 images
TOTAPURI: 464 images


: 

Note - The smallest class (SUVARNA_REKHA) has ~50% fewer images than the largest (ARKA_NEELACHAL_KESARI), which can cause the model to favor the majority classes.

In [5]:
import os

for cls in sorted(os.listdir(".")):
    if os.path.isdir(cls):
        files = os.listdir(cls)
        print(cls, "‚Üí", len(files), "files")


.vscode ‚Üí 1 files
mango_dataset ‚Üí 2 files
mango_dataset_clean ‚Üí 6 files


In [6]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader


In [7]:
IMAGE_SIZE = 224   # standard for ImageNet models
BATCH_SIZE = 8     # small batch so we can inspect easily

basic_transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])


In [13]:
dataset = datasets.ImageFolder(
    root="mango_dataset_clean",
    transform=basic_transform
)

print("Classes detected:", dataset.classes)
print("Total images:", len(dataset))


Classes detected: ['AMRAPALLI', 'ARKA_NEELACHAL_KESARI', 'BANGANPALLI', 'DASHEHARI', 'SUVARNA_REKHA', 'TOTAPURI']
Total images: 2633


In [14]:
loader = DataLoader(
    dataset,
    batch_size=BATCH_SIZE,
    shuffle=True
)

images, labels = next(iter(loader))

print("Image batch shape:", images.shape)
print("Label batch shape:", labels.shape)
print("Sample labels:", labels)


Image batch shape: torch.Size([8, 3, 224, 224])
Label batch shape: torch.Size([8])
Sample labels: tensor([2, 2, 2, 3, 4, 1, 2, 5])


In [15]:
from torchvision import datasets, transforms
from torch.utils.data import random_split

basic_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

full_dataset = datasets.ImageFolder(
    root="mango_dataset_clean",
    transform=basic_transform
)

print("Total images:", len(full_dataset))
print("Classes:", full_dataset.classes)


Total images: 2633
Classes: ['AMRAPALLI', 'ARKA_NEELACHAL_KESARI', 'BANGANPALLI', 'DASHEHARI', 'SUVARNA_REKHA', 'TOTAPURI']


In [16]:
total_size = len(full_dataset)
train_size = int(0.8 * total_size)
val_size = total_size - train_size

print("Training size:", train_size)
print("Validation size:", val_size)


Training size: 2106
Validation size: 527


In [17]:
train_dataset, val_dataset = random_split(
    full_dataset,
    [train_size, val_size]
)

print("Train dataset length:", len(train_dataset))
print("Validation dataset length:", len(val_dataset))


Train dataset length: 2106
Validation dataset length: 527


In [18]:
from torch.utils.data import DataLoader

train_loader = DataLoader(
    train_dataset,
    batch_size=8,
    shuffle=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=8,
    shuffle=False
)

print("Train batches:", len(train_loader))
print("Validation batches:", len(val_loader))


Train batches: 264
Validation batches: 66


Epoch (1 full training cycle)
‚îÇ
‚îú‚îÄ‚îÄ 264 training batches (learning happens)
‚îÇ
‚îî‚îÄ‚îÄ 66 validation batches (evaluation only)


In [19]:
from torchvision import transforms

train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=10),
    transforms.ColorJitter(
        brightness=0.2,
        contrast=0.2,
        saturation=0.2
    ),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])


In [20]:
val_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])


In [21]:
# Apply augmentation to TRAIN set
train_dataset.dataset.transform = train_transform

# Apply clean transform to VALIDATION set
val_dataset.dataset.transform = val_transform


In [22]:
from torch.utils.data import DataLoader

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)

images, labels = next(iter(train_loader))

print("Augmented train batch shape:", images.shape)


Augmented train batch shape: torch.Size([8, 3, 224, 224])


STEP 5 ‚Äî Handling Class Imbalance - we will use Class weights

In [23]:
import numpy as np
import torch

# Extract labels from the full dataset
# dataset.samples = list of (image_path, class_index)
targets = [label for _, label in full_dataset.samples]

# Count how many images belong to each class
class_counts = np.bincount(targets)

print("Class counts:", class_counts)


Class counts: [455 541 443 438 292 464]


In [24]:
# Convert counts to tensor
class_counts = torch.tensor(class_counts, dtype=torch.float)

# Inverse frequency: smaller class ‚Üí larger weight
class_weights = 1.0 / class_counts

# Normalize weights (optional but good practice)
class_weights = class_weights / class_weights.sum()

print("Class weights:", class_weights)


Class weights: tensor([0.1551, 0.1305, 0.1593, 0.1612, 0.2417, 0.1521])


In [25]:
import torch
import torch.nn as nn
from torchvision import models


In [26]:
# Load EfficientNet-B0 pretrained on ImageNet
model = models.efficientnet_b0(pretrained=True)




In [27]:
print(model)


EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

In [28]:
# Freeze all pretrained layers (backbone)
for param in model.parameters():
    param.requires_grad = False


In [29]:
num_classes = 6  # mango varieties

# Replace the final classification layer
model.classifier[1] = nn.Linear(
    in_features=model.classifier[1].in_features,
    out_features=num_classes
)


In [30]:
import torch

print("CUDA available:", torch.cuda.is_available())
print("CUDA version:", torch.version.cuda)

if torch.cuda.is_available():
    print("GPU name:", torch.cuda.get_device_name(0))


CUDA available: True
CUDA version: 11.8
GPU name: NVIDIA GeForce GTX 1650


In [32]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cuda


In [35]:
# Make sure device exists
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move model to GPU
model = model.to(device)

print("Model moved to:", device)


Model moved to: cuda


In [36]:
# Take one batch from train_loader
images, labels = next(iter(train_loader))

# Move images to GPU
images = images.to(device)

# Forward pass (NO training)
outputs = model(images)

print("Model output shape:", outputs.shape)


Model output shape: torch.Size([8, 6])


‚úÖ Current status (where we are)

You have successfully completed:

‚úî Clean dataset

‚úî Train / validation split

‚úî Data augmentation (train only)

‚úî Class imbalance handling (class weights)

‚úî EfficientNet-B0 model built

‚úî Model + data running on GPU

‚úî Forward pass verified on GPU

In [39]:
print("Device:", device)

# Confirm only classifier is trainable
for name, param in model.named_parameters():
    if param.requires_grad:
        print("Trainable:", name)


Device: cuda
Trainable: classifier.1.weight
Trainable: classifier.1.bias


In [41]:
# Move class weights to GPU
class_weights = class_weights.to(device)


In [42]:
import torch.nn as nn

criterion = nn.CrossEntropyLoss(weight=class_weights)


In [43]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = model.to(device)
class_weights = class_weights.to(device)

criterion = nn.CrossEntropyLoss(weight=class_weights)


In [44]:
# Training loop
import torch
import torch.nn as nn
import torch.optim as optim

# -------------------------
# Loss function (with class weights)
# -------------------------
criterion = nn.CrossEntropyLoss(weight=class_weights)

# -------------------------
# Optimizer (classifier only)
# -------------------------
optimizer = optim.Adam(
    model.classifier.parameters(),
    lr=1e-3
)

EPOCHS = 10

for epoch in range(EPOCHS):

    # ========= TRAINING =========
    model.train()
    train_loss = 0.0
    train_correct = 0
    train_total = 0

    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        outputs = model(images)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, preds = torch.max(outputs, 1)

        train_total += labels.size(0)
        train_correct += (preds == labels).sum().item()

    train_loss /= len(train_loader)
    train_acc = train_correct / train_total

    # ========= VALIDATION =========
    model.eval()
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            _, preds = torch.max(outputs, 1)

            val_total += labels.size(0)
            val_correct += (preds == labels).sum().item()

    val_acc = val_correct / val_total

    # ========= LOG =========
    print(
        f"Epoch [{epoch+1}/{EPOCHS}] | "
        f"Train Loss: {train_loss:.4f} | "
        f"Train Acc: {train_acc:.4f} | "
        f"Val Acc: {val_acc:.4f}"
    )


Epoch [1/10] | Train Loss: 1.2027 | Train Acc: 0.6059 | Val Acc: 0.7666
Epoch [2/10] | Train Loss: 0.7642 | Train Acc: 0.7773 | Val Acc: 0.8046
Epoch [3/10] | Train Loss: 0.6337 | Train Acc: 0.8158 | Val Acc: 0.8501
Epoch [4/10] | Train Loss: 0.5230 | Train Acc: 0.8481 | Val Acc: 0.8482
Epoch [5/10] | Train Loss: 0.4992 | Train Acc: 0.8557 | Val Acc: 0.8786
Epoch [6/10] | Train Loss: 0.4760 | Train Acc: 0.8618 | Val Acc: 0.8748
Epoch [7/10] | Train Loss: 0.4141 | Train Acc: 0.8780 | Val Acc: 0.8558
Epoch [8/10] | Train Loss: 0.4038 | Train Acc: 0.8732 | Val Acc: 0.8899
Epoch [9/10] | Train Loss: 0.3955 | Train Acc: 0.8784 | Val Acc: 0.8710
Epoch [10/10] | Train Loss: 0.3643 | Train Acc: 0.8884 | Val Acc: 0.9051


Now we will do fine tuning to increase the accuracy 

In [45]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
class_weights = class_weights.to(device)

print("Using device:", device)


Using device: cuda


In [46]:
# Freeze everything first (safety)
for param in model.parameters():
    param.requires_grad = False

# Unfreeze classifier (always trainable)
for param in model.classifier.parameters():
    param.requires_grad = True

# Unfreeze top 2 EfficientNet feature blocks
for param in model.features[-2:].parameters():
    param.requires_grad = True


In [47]:
print("Trainable parameters after unfreezing:\n")

for name, param in model.named_parameters():
    if param.requires_grad:
        print(name)


Trainable parameters after unfreezing:

features.7.0.block.0.0.weight
features.7.0.block.0.1.weight
features.7.0.block.0.1.bias
features.7.0.block.1.0.weight
features.7.0.block.1.1.weight
features.7.0.block.1.1.bias
features.7.0.block.2.fc1.weight
features.7.0.block.2.fc1.bias
features.7.0.block.2.fc2.weight
features.7.0.block.2.fc2.bias
features.7.0.block.3.0.weight
features.7.0.block.3.1.weight
features.7.0.block.3.1.bias
features.8.0.weight
features.8.1.weight
features.8.1.bias
classifier.1.weight
classifier.1.bias


In [48]:
import torch.nn as nn

criterion = nn.CrossEntropyLoss(weight=class_weights)


In [49]:
import torch.optim as optim

optimizer = optim.Adam(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=1e-4   # VERY IMPORTANT: small LR
)


In [50]:
FINE_TUNE_EPOCHS = 5

for epoch in range(FINE_TUNE_EPOCHS):

    # =========================
    # TRAINING
    # =========================
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        outputs = model(images)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, preds = torch.max(outputs, 1)

        total += labels.size(0)
        correct += (preds == labels).sum().item()

    train_loss = running_loss / len(train_loader)
    train_acc = correct / total

    # =========================
    # VALIDATION
    # =========================
    model.eval()
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            _, preds = torch.max(outputs, 1)

            val_total += labels.size(0)
            val_correct += (preds == labels).sum().item()

    val_acc = val_correct / val_total

    # =========================
    # LOG
    # =========================
    print(
        f"[Fine-Tune Epoch {epoch+1}/{FINE_TUNE_EPOCHS}] | "
        f"Train Loss: {train_loss:.4f} | "
        f"Train Acc: {train_acc:.4f} | "
        f"Val Acc: {val_acc:.4f}"
    )


[Fine-Tune Epoch 1/5] | Train Loss: 0.3091 | Train Acc: 0.9065 | Val Acc: 0.9051
[Fine-Tune Epoch 2/5] | Train Loss: 0.1983 | Train Acc: 0.9430 | Val Acc: 0.9203
[Fine-Tune Epoch 3/5] | Train Loss: 0.1536 | Train Acc: 0.9558 | Val Acc: 0.8994
[Fine-Tune Epoch 4/5] | Train Loss: 0.1152 | Train Acc: 0.9706 | Val Acc: 0.9241
[Fine-Tune Epoch 5/5] | Train Loss: 0.1231 | Train Acc: 0.9696 | Val Acc: 0.9127


In [51]:
import os
import torch

# Create folder to save models
os.makedirs("saved_models", exist_ok=True)

MODEL_PATH = "saved_models/mango_efficientnet_b0_finetuned.pth"

torch.save(model.state_dict(), MODEL_PATH)

print("‚úÖ Best model saved at:", MODEL_PATH)


‚úÖ Best model saved at: saved_models/mango_efficientnet_b0_finetuned.pth


In [54]:
from torch.utils.data import random_split, DataLoader

# Split validation set into validation and test (50% / 50%)
val_size = len(val_dataset) // 2
test_size = len(val_dataset) - val_size

val_dataset, test_dataset = random_split(
    val_dataset,
    [val_size, test_size]
)

# Create DataLoaders
val_loader = DataLoader(
    val_dataset,
    batch_size=8,
    shuffle=False
)

test_loader = DataLoader(
    test_dataset,
    batch_size=8,
    shuffle=False
)

print("Validation samples:", len(val_dataset))
print("Test samples:", len(test_dataset))


Validation samples: 263
Test samples: 264


In [55]:
print("Test samples:", len(test_loader.dataset))


Test samples: 264


#Final Evaluation on Test Set Dataset

In [56]:
#Final Evaluation on Test Set Dataset
model.eval()


EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

In [57]:
import numpy as np
from sklearn.metrics import (
    accuracy_score,
    classification_report,
    confusion_matrix
)

y_true = []
y_pred = []

with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        _, preds = torch.max(outputs, 1)

        y_true.extend(labels.cpu().numpy())
        y_pred.extend(preds.cpu().numpy())


In [58]:
test_accuracy = accuracy_score(y_true, y_pred)
print("‚úÖ Test Accuracy:", test_accuracy)
print("\nüìä Classification Report (per mango variety):\n")

print(
    classification_report(
        y_true,
        y_pred,
        target_names=full_dataset.classes,
        digits=4
    )
)
cm = confusion_matrix(y_true, y_pred)

print("üß© Confusion Matrix:\n")
print(cm)


‚úÖ Test Accuracy: 0.9090909090909091

üìä Classification Report (per mango variety):

                       precision    recall  f1-score   support

            AMRAPALLI     0.9348    0.9556    0.9451        45
ARKA_NEELACHAL_KESARI     0.9672    1.0000    0.9833        59
          BANGANPALLI     0.7500    0.8333    0.7895        36
            DASHEHARI     0.9767    0.7500    0.8485        56
        SUVARNA_REKHA     0.7647    1.0000    0.8667        26
             TOTAPURI     1.0000    0.9524    0.9756        42

             accuracy                         0.9091       264
            macro avg     0.8989    0.9152    0.9014       264
         weighted avg     0.9194    0.9091    0.9091       264

üß© Confusion Matrix:

[[43  1  0  1  0  0]
 [ 0 59  0  0  0  0]
 [ 0  0 30  0  6  0]
 [ 3  0 10 42  1  0]
 [ 0  0  0  0 26  0]
 [ 0  1  0  0  1 40]]


üß† Why 95% accuracy is hard (key insight)

Fine-grained classification errors are caused by:

Very similar shapes (BANGANPALLI ‚Üî DASHEHARI)

Color overlap

Intra-class variation

Limited samples per class

Human-level ambiguity

Even humans may disagree on some images.

So accuracy is limited by data, not just architecture.