## Fruit Image Classifier - Part 1: Imports & Preprocessing


In [5]:
import os
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# Fix random seeds for reproducibility
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed_all(42)

# src imports
from gpu_utils import CheckGPU, CheckCUDA, CheckGPUBrief
from dataset_counter import CountDataset

#### Detect GPU Available, Details, Cuda, and cuDNN

In [6]:
# From 
CheckGPU()
CheckCUDA()

🖥️  GPU INFORMATION
✅ GPU Detected         : NVIDIA GeForce RTX 2060
   • Device ID          : 0
   • Compute Capability : 7.5
   • Multiprocessors    : 30
   • Total VRAM         : 6.00 GB
   • VRAM Allocated     : 0.00 GB
   • VRAM Reserved      : 0.00 GB
   • Active Device      : cuda

⚡ CUDA / PYTORCH INFORMATION
✅ CUDA Available       : True
   • PyTorch CUDA Ver.  : 11.8
   • PyTorch Version    : 2.7.1+cu118
✅ cuDNN Version        : 90100
   • CUDA Device Count  : 1
   • Device 0 Name     : NVIDIA GeForce RTX 2060


### Define dataset path and categories

In [None]:


DATASET_DIR = "../dataset"

DATASET_INFO = CountDataset(DATASET_DIR)
'''
# Example:
DATASET_INFO = {
    "Apple":  {"count": 19515, "ratio":  1.0000, "needed":     0, "size_mb":  816.94},
    "Banana": {"count": 11612, "ratio":  1.6806, "needed":  7903, "size_mb":  223.78},
    "Grapes": {"count":  2198, "ratio":  8.8785, "needed": 17317, "size_mb": 6605.82},
    "Mango":  {"count":  2505, "ratio":  7.7904, "needed": 17010, "size_mb":  191.21},
    "Orange": {"count":   232, "ratio": 84.1164, "needed": 19283, "size_mb":   29.25},

    "total_images": 36062,
    "max_class_count": 19515,
    "total_size_mb": 7867.01
}
'''


IMG_SIZE = 224
BATCH_SIZE = 32




📊 DATASET SUMMARY
Class       count       ratio     needed        size_mb
Apple       19515      1.0000          0      816.94 MB
Banana      11612      1.6806       7903      223.78 MB
Grapes       2198      8.8785      17317     6605.82 MB
Mango        2505      7.7904      17010      191.21 MB
Orange        232     84.1164      19283       29.25 MB
total_images  36062
max_class_count  19515
total_size_mb                                   7867.01 MB


### Load and preprocess images

In [8]:
for split in SPLITS:
    split_data = []
    split_labels = []
    split_path = os.path.join(DATASET_DIR, split)

    print(f"\n📂 Loading {split} data...")

    for category in CATEGORIES:
        category_path = os.path.join(split_path, category)

        # Loop through subfolders (e.g., apple rotten/, apple eaten/)
        for subfolder in os.listdir(category_path):
            subfolder_path = os.path.join(category_path, subfolder)
            if not os.path.isdir(subfolder_path):
                continue

            for img_name in tqdm(os.listdir(subfolder_path), desc=f"{split}/{category}/{subfolder}"):
                img_path = os.path.join(subfolder_path, img_name)
                try:
                    img = cv2.imread(img_path)
                    if img is None:
                        continue
                    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                    img = cv2.resize(img, IMG_SIZE)
                    img = img.astype(np.float32) / 255.0

                    split_data.append(img)
                    split_labels.append(category)
                except Exception as e:
                    print(f"⚠️ Error loading {img_path}: {e}")

    data_splits[split] = np.array(split_data)
    labels_splits[split] = np.array(split_labels)

    print(f"✅ {split} images loaded: {len(split_data)}")
    print(f"📐 {split} shape: {data_splits[split].shape}")

NameError: name 'SPLITS' is not defined

In [None]:
### Encode labels

In [None]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

# Fit encoder only once (using training labels)
label_encoder = LabelEncoder()
label_encoder.fit(labels_splits["training"])

# Encode all splits
y_train = to_categorical(label_encoder.transform(labels_splits["training"]))
y_val = to_categorical(label_encoder.transform(labels_splits["validation"]))
y_test = to_categorical(label_encoder.transform(labels_splits["test"]))

X_train = data_splits["training"]
X_val = data_splits["validation"]
X_test = data_splits["test"]

print("\n🏷️ Label mapping:")
for i, label in enumerate(label_encoder.classes_):
    print(f"  {i}: {label}")

### Show Sample Images

In [None]:
def show_sample_images(X, y, encoder, n=5):
    plt.figure(figsize=(15, 3))
    for i in range(n):
        idx = np.random.randint(0, len(X))
        plt.subplot(1, n, i+1)
        plt.imshow(X[idx])
        plt.title(encoder.inverse_transform([np.argmax(y[idx])])[0])
        plt.axis("off")
    plt.show()

show_sample_images(X_train, y_train, label_encoder)

### Part 2: Data Augmentation

After successfully loading and preprocessing our dataset in **Part 1**, the next steps are to:

1. **Apply data augmentation** to make the model more robust and reduce overfitting.  
2. **Prepare the data generators** that will feed the model efficiently.


### 📸 Data Augmentation

To make the model generalize better, we apply small random transformations to the training images:

- **Rotation:** up to ±20°  
- **Shifting:** horizontally or vertically up to 10%  
- **Zooming:** up to 20%  
- **Horizontal flip:** mirrors images to simulate variations

These augmentations are applied **on the fly** during training.


In [None]:
# Part 2: Data Augmentation

from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Data augmentation for training set only
train_datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

val_datagen = ImageDataGenerator()  # no augmentation on validation data
test_datagen = ImageDataGenerator()

train_generator = train_datagen.flow(X_train, y_train, batch_size=32, shuffle=True)
val_generator = val_datagen.flow(X_val, y_val, batch_size=32, shuffle=False)
test_generator = test_datagen.flow(X_test, y_test, batch_size=32, shuffle=False)

print("✅ Data augmentation ready.")


### Visualize Augmented Images (Optional)

Before moving to model training, it’s a good idea to visualize a few augmented samples to ensure the transformations look correct.


In [None]:
import matplotlib.pyplot as plt

augmented_images, _ = next(train_generator)
plt.figure(figsize=(15, 3))
for i in range(5):
    plt.subplot(1, 5, i+1)
    plt.imshow(augmented_images[i])
    plt.axis("off")
plt.suptitle("Example Augmented Training Images", fontsize=14)
plt.show()

## Part 3 — Model Building (Transfer Learning)

### Step 1: Import the required libraries

In [None]:
# Import the required libraries
from tensorflow.keras import layers, models
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.optimizers import Adam

### Step 2: Load the Pre-trained Model (ResNet50)

In [None]:
# Number of classes
num_classes = len(label_encoder.classes_)

# Load pre-trained ResNet50
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
for layer in base_model.layers:
    layer.trainable = False


### Step 3: Build the Classification Head

In [None]:
# Create new model on top of base
model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(num_classes, activation='softmax')
])


### Step 4: Compile the Model

In [None]:
model.compile(
    optimizer=Adam(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()


📝 Why categorical_crossentropy?
Because your labels are one-hot encoded (y_train and y_test).

### Step 5: Preprocess and Prepare Data for Training

In [None]:
# Preprocess the data
print("🔄 Preprocessing training data...")
X_train_prep = preprocess_input(X_train)
print("✅ Training data preprocessing complete.")

print("🔄 Preprocessing test data...")
X_test_prep = preprocess_input(X_test)
print("✅ Test data preprocessing complete.")

### Step 6: Data Augmentation (Optional)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    zoom_range=0.2
)

datagen.fit(X_train_prep)


## Model Training

In [None]:
import tensorflow as tf
import time
from tensorflow.keras.callbacks import Callback


# ✅ Check if GPU is available
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    device_name = '/GPU:0'
    print(f"✅ Training on GPU: {gpus[0]}")
else:
    device_name = '/CPU:0'
    print("⚠️ No GPU detected — training on CPU.")


# Custom callback to measure time per epoch
class TimeHistory(Callback):
    def on_epoch_begin(self, epoch, logs=None):
        self.epoch_start_time = time.time()

    def on_epoch_end(self, epoch, logs=None):
        epoch_time = time.time() - self.epoch_start_time
        print(f"⏱️ Time for epoch {epoch + 1}: {epoch_time:.2f} seconds")

# Instantiate the callback
time_callback = TimeHistory()


# Model training

batch_size = 32
epochs = 20

with tf.device(device_name):
    history = model.fit(
        train_generator,
        validation_data=val_generator,
        epochs=epochs,
        callbacks=[time_callback],
        verbose=1       # Show progress bar with each epoch
    )

✅ More epochs → the model learns more patterns.

⚠️ But too many epochs can lead to overfitting — when the model memorizes the training data and performs poorly on new data.

Too few epochs can lead to underfitting — when the model hasn’t learned enough.

📝 Tip: Start with 10–20 epochs, then increase if the model is still improving.

## Part 4: Model Evaluation and Visualization
After training the model, it’s important to evaluate its performance and visualize how well it learned over time. This helps us determine if the model is underfitting, overfitting, or performing as expected.

### 📊 1. Evaluating the Model

We use the test dataset to check how well the model performs on unseen data.
This gives us metrics like accuracy and loss, which reflect how close the model’s predictions are to the actual labels.

In [None]:
# Evaluate the model on test data
test_loss, test_acc = model.evaluate(test_generator)
print(f"✅ Test Accuracy: {test_acc*100:.2f}%")
print(f"📉 Test Loss: {test_loss:.4f}")


### Plotting Training Accuracy and Loss

During training, we stored the accuracy and loss values for each epoch.
Plotting them helps us visualize how the model improved over time.

In [None]:
import matplotlib.pyplot as plt

# Plot accuracy
plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()


# Plot loss
plt.subplot(1,2,2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()


📝 Interpretation Tips:

📈 If training accuracy is much higher than validation accuracy → overfitting.

📉 If both are low → underfitting.

✅ If both curves improve smoothly and are close → good training.

### 3. Confusion Matrix

A confusion matrix provides a more detailed view of classification results.
It shows how many times the model correctly predicted each class versus how many times it confused it with another.

In [None]:
import numpy as np
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

# Get predictions on test set
y_true = np.argmax(y_test, axis=1)  # true labels from encoded test set
y_pred_probs = model.predict(test_generator)
y_pred = np.argmax(y_pred_probs, axis=1)

# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=label_encoder.classes_)
disp.plot(cmap='Blues', xticks_rotation=45)
plt.title('Confusion Matrix')
plt.show()


🧠 How to read the confusion matrix:

Diagonal values = correct predictions ✅

Off-diagonal values = misclassifications ❌

A perfect model would have non-zero values only along the diagonal.