# FRUITS DISEASE CLASSIFICATION

# Install required packages (no kagglehub needed)

In [30]:
!pip install tensorflow numpy matplotlib scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.7.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (11 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Downloading joblib-1.5.2-py3-none-any.whl.metadata (5.6 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Downloading threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.7.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (9.5 MB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.5/9.5 MB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m0:01[0m:01[0m
[?25hDownloading joblib-1.5.2-py3-none-any.whl (308 kB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m308.4/308.4 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:01[0m
[?25hDownloading threadpoolctl-3.6.0-py3-none-any.whl (18 kB)
Installing collected packages: threadpoolctl, joblib, scikit-learn
Successfully insta

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import os
import matplotlib.pyplot as plt


2025-12-05 18:12:08.962372: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-12-05 18:12:08.996766: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-12-05 18:12:10.402746: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.


## 1. Data & Model prep


In [None]:
# Hyper params - OPTIMIZED FOR SPEED
BATCH_SIZE = 128  # Increased from 20 for faster processing
IMAGE_SIZE = (96, 96)  # MobileNetV2 optimal size - faster than 224x224
EPOCHS = 12

# Using local Fruits Disease Classification dataset
# Note: This dataset is organized by fruit type and disease, not train/test/validation
# We'll need to split it programmatically or use the entire dataset
DATA_DIR = './FRUITS DATASET FOR FRUIT DISEASE CLASSIFICATION'

# For now, we'll treat the entire dataset as training data
# and use validation_split in the model training
TRAIN_DIR = DATA_DIR
print(f"Using dataset from: {DATA_DIR}")

Using dataset from: ./FRUITS DATASET FOR FRUIT DISEASE CLASSIFICATION


In [3]:
# Create train/validation split from the original dataset
# This dataset has structure: FRUIT_TYPE/DISEASE_TYPE/images
# We need to reorganize it to: split/FRUIT_DISEASE/images

import shutil
from sklearn.model_selection import train_test_split

# Create output directories
output_base = './disease_dataset_split'
train_output = os.path.join(output_base, 'train')
valid_output = os.path.join(output_base, 'validation')

# Only create split if it doesn't exist
if not os.path.exists(output_base):
    os.makedirs(train_output, exist_ok=True)
    os.makedirs(valid_output, exist_ok=True)
    
    # Iterate through fruit types and disease types
    for fruit in os.listdir(DATA_DIR):
        fruit_path = os.path.join(DATA_DIR, fruit)
        if not os.path.isdir(fruit_path):
            continue
            
        for disease in os.listdir(fruit_path):
            disease_path = os.path.join(fruit_path, disease)
            if not os.path.isdir(disease_path):
                continue
            
            # Get all images
            images = [f for f in os.listdir(disease_path) if f.endswith(('.png', '.jpg', '.jpeg'))]
            
            # Split into train and validation (80/20)
            train_images, valid_images = train_test_split(images, test_size=0.2, random_state=42)
            
            # Create class directories
            train_class_dir = os.path.join(train_output, disease)
            valid_class_dir = os.path.join(valid_output, disease)
            os.makedirs(train_class_dir, exist_ok=True)
            os.makedirs(valid_class_dir, exist_ok=True)
            
            # Copy training images
            for img in train_images:
                src = os.path.join(disease_path, img)
                dst = os.path.join(train_class_dir, img)
                shutil.copy2(src, dst)
            
            # Copy validation images
            for img in valid_images:
                src = os.path.join(disease_path, img)
                dst = os.path.join(valid_class_dir, img)
                shutil.copy2(src, dst)
    
    print("Dataset split created successfully!")
else:
    print("Dataset split already exists, using existing split.")

# Update directories
TRAIN_DIR = train_output
VALID_DIR = valid_output
print(f"Training data: {TRAIN_DIR}")
print(f"Validation data: {VALID_DIR}")

Dataset split already exists, using existing split.
Training data: ./disease_dataset_split/train
Validation data: ./disease_dataset_split/validation


In [4]:
# load the data with ImageDataGenerator to load images , resize them, and apply basic data augmentation(rotaiton, flips...) to improve the model's robustness.
# Rescale to [0, 1]
train_datagen = ImageDataGenerator(
    rescale = 1./255 ,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)
# no augmentation for validaiton
valid_datagen = ImageDataGenerator(
    rescale = 1./255
)
# load the training data
train_generator = train_datagen.flow_from_directory(
    TRAIN_DIR,
    target_size = IMAGE_SIZE,
    batch_size = BATCH_SIZE,
    class_mode= 'categorical',
    shuffle=True
)
validation_generator = valid_datagen.flow_from_directory(
    VALID_DIR,
    target_size = IMAGE_SIZE,
    batch_size = BATCH_SIZE,
    class_mode= 'categorical',
    shuffle=False
)
# the number of classes for the final layer
NUM_CLASSES = train_generator.num_classes
print(f"Total classes detected : {NUM_CLASSES}")

Found 5225 images belonging to 17 classes.
Found 1312 images belonging to 17 classes.
Total classes detected : 17


![img](https://encrypted-tbn3.gstatic.com/licensed-image?q=tbn:ANd9GcS8ZAQqtM-09H9jSR8hOrkmPZkc9c72vG4q97zfwxLmV5101IvOKMpveIKsUGEGooWe-VT6HqSqqps5EPS0vxdXeJ5tckxYrQwiIAtTxLSFUG_rcwE)

In [5]:
# Load base model
# Load MobileNetV2 pre-trained on ImageNet, without the top classification layer
base_model = tf.keras.applications.MobileNetV2(
    input_shape = IMAGE_SIZE + (3,),
    include_top = False,
    weights = 'imagenet'
)
# Freeze the base model to prevent weights form being updated during the training
base_model.trainable = False

  base_model = tf.keras.applications.MobileNetV2(
E0000 00:00:1764954740.602300  260377 cuda_executor.cc:1309] INTERNAL: CUDA Runtime error: Failed call to cudaGetRuntimeVersion: Error loading CUDA libraries. GPU will not be used.: Error loading CUDA libraries. GPU will not be used.
W0000 00:00:1764954740.616768  260377 gpu_device.cc:2342] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [6]:
# Build the custom classififer Head
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(128, activation = 'relu'),
    Dropout(0.2),# regularization to prevent overfitting
    Dense(NUM_CLASSES, activation = 'softmax') # final classification layer
])
model.summary()

In [7]:
model.compile(
    optimizer = Adam(learning_rate = 0.0001),
    loss = 'categorical_crossentropy',
    metrics = ['accuracy']
)

# 2. Training

In [None]:
history = model.fit(
    train_generator,
    epochs = EPOCHS,
    validation_data = validation_generator
)

Epoch 1/12


In [None]:
# save the trained keras model for potential future use
model.save('ripness_cnn_model.h5')

# 3. Plotting results

In [None]:
# Plot training history
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(NUM_EPOCHS)

plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.legend()
plt.show()

# 4. Convert the Keras model to TFLite

In [None]:
# Initialize the TFLite converter
converter = tf.lite.TFLiteConverter.from_keras_model(model)

# Apply default optimization (Post-Training Quantization) for smaller size and faster inference
converter.optimizations = [tf.lite.Optimize.DEFAULT]

# Convert the model
tflite_model = converter.convert()

# Save the TFLite model file
tflite_model_path = 'ripeness_model.tflite'
with open(tflite_model_path, 'wb') as f:
    f.write(tflite_model)

print(f"TFLite model saved to: {tflite_model_path}")

# 5. Save the Label map
since the flutter pap needs a lsit f the class names in the correct order to interpret the model's output

In [None]:
# Get class indices and map them to class names
labels = sorted(train_generator.class_indices.items(), key=lambda x: x[1])
class_names = [name for name, index in labels]

# Save class names to a text file
labels_file_path = 'ripeness_labels.txt'
with open(labels_file_path, 'w') as f:
    f.write('\n'.join(class_names))

print(f"Label map saved to: {labels_file_path}")
print("Final Classes:", class_names)