This is an example of a simple CNN developed, trained and utilized

AI was used to help generate the codebase

Note: Make sure that the tensorflow package is installed in your device.

In [4]:
# DATASET DIRECTORY CONFIGURATION
# Choose the correct existing path to the Apple vs Orange dataset under project `archive/`.
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models
import numpy as np

candidates = [
    os.path.join(os.getcwd(), 'archive', 'apple_orange_split'),
    os.path.join(os.getcwd(), 'apple_orange_split'),
    os.path.abspath('archive\apple_orange_split'),
    os.path.abspath('apple_orange_split')
]
base_dir = None
for c in candidates:
    if os.path.isdir(c):
        base_dir = c
        break
if base_dir is None:
    # fallback to the relative path (will raise later if incorrect)
    base_dir = os.path.abspath(os.path.join(os.getcwd(), 'archive', 'apple_orange_split'))

train_dir = os.path.join(base_dir, 'train')  # Path to training data
val_dir = os.path.join(base_dir, 'val')    # Path to validation data
test_dir = os.path.join(base_dir, 'test')  # Path to test data
print('Using dataset base_dir:', base_dir)

Using dataset base_dir: c:\Users\corpe\Downloads\vincent\archive\apple_orange_split


In [16]:
# (Removed: old hardcoded dataset directory configuration for pizza_split)
# This cell is intentionally left blank. The correct dataset paths are set in the cell above.
# IMAGE PARAMETERS
# Used to resize the input images, also will determine the input size of your input layer.
# Updated to 224x224 to match MobileNetV2's expected input size
IMG_SIZE = (224, 224)
BATCH_SIZE = 32

In [6]:
# DATA PREPROCESSING & AUGMENTATION
# Enhanced data augmentation pipeline for the Apple vs Orange dataset
# This cell uses the train_dir, val_dir, test_dir variables set in the config cell above.
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.2,  # Added zoom augmentation
    brightness_range=[0.8, 1.2],  # Added brightness augmentation
    shear_range=0.2,  # Added shear augmentation
    horizontal_flip=True
)

val_test_datagen = ImageDataGenerator(rescale=1./255)

print('train_dir:', train_dir)
print('val_dir:', val_dir)
print('test_dir:', test_dir)

import os
assert os.path.isdir(train_dir), f"Train dir not found: {train_dir}"
assert os.path.isdir(val_dir), f"Val dir not found: {val_dir}"
assert os.path.isdir(test_dir), f"Test dir not found: {test_dir}"

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary'
)
val_generator = val_test_datagen.flow_from_directory(
    val_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary'
)
test_generator = val_test_datagen.flow_from_directory(
    test_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=False
)

train_dir: c:\Users\corpe\Downloads\vincent\archive\apple_orange_split\train
val_dir: c:\Users\corpe\Downloads\vincent\archive\apple_orange_split\val
test_dir: c:\Users\corpe\Downloads\vincent\archive\apple_orange_split\test
Found 1328 images belonging to 2 classes.
Found 1328 images belonging to 2 classes.
Found 284 images belonging to 2 classes.
Found 284 images belonging to 2 classes.
Found 287 images belonging to 2 classes.
Found 287 images belonging to 2 classes.


In [17]:
# DATA PREPROCESSING & AUGMENTATION
# Enhanced data augmentation pipeline
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.2,  # Added zoom augmentation
    brightness_range=[0.8, 1.2],  # Added brightness augmentation
    shear_range=0.2,  # Added shear augmentation
    horizontal_flip=True,
    validation_split=0.2
)

test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='training'
)
val_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='validation'
)
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=False
)

Found 1063 images belonging to 2 classes.
Found 265 images belonging to 2 classes.
Found 265 images belonging to 2 classes.
Found 287 images belonging to 2 classes.
Found 287 images belonging to 2 classes.


In [18]:
# TRANSFER LEARNING WITH MOBILENETV2
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras import regularizers

base_model = MobileNetV2(input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3),
                         include_top=False,
                         weights='imagenet')
base_model.trainable = False  # Freeze base model

model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.001)),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])

In [19]:
# Configure the model optimizers, loss function, and metrics
# Updated to use a fixed learning rate for compatibility with ReduceLROnPlateau
from tensorflow.keras.callbacks import ReduceLROnPlateau

optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)  # Fixed learning rate

# Reduce learning rate when a metric has stopped improving
reduce_lr = ReduceLROnPlateau(
    monitor='val_loss', 
    factor=0.5,  # Reduce learning rate by a factor of 0.5
    patience=3,  # Number of epochs with no improvement after which learning rate will be reduced
    min_lr=1e-6  # Lower bound on the learning rate
)

model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

In [20]:
# TRAINING THE IMPROVED CNN WITH LEARNING RATE SCHEDULING
history = model.fit(
    train_generator,
    epochs=15,  # Slightly more epochs for improved model
    validation_data=val_generator,
    callbacks=[reduce_lr]  # Added learning rate scheduler
)

Epoch 1/15
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 2s/step - accuracy: 0.9370 - loss: 0.3781 - val_accuracy: 0.9849 - val_loss: 0.2929 - learning_rate: 0.0010
Epoch 2/15
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 2s/step - accuracy: 0.9370 - loss: 0.3781 - val_accuracy: 0.9849 - val_loss: 0.2929 - learning_rate: 0.0010
Epoch 2/15
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 2s/step - accuracy: 0.9953 - loss: 0.2164 - val_accuracy: 0.9849 - val_loss: 0.2237 - learning_rate: 0.0010
Epoch 3/15
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 2s/step - accuracy: 0.9953 - loss: 0.2164 - val_accuracy: 0.9849 - val_loss: 0.2237 - learning_rate: 0.0010
Epoch 3/15
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 2s/step - accuracy: 0.9887 - loss: 0.2062 - val_accuracy: 0.9962 - val_loss: 0.1989 - learning_rate: 0.0010
Epoch 4/15
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 2s/s

In [21]:
# EVALUATE THE MODEL
test_loss, test_acc = model.evaluate(test_generator)
print(f"Test Accuracy: {test_acc}")

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 673ms/step - accuracy: 1.0000 - loss: 0.0623
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 673ms/step - accuracy: 1.0000 - loss: 0.0623
Test Accuracy: 1.0
Test Accuracy: 1.0


In [22]:
# SAVE THE MODEL
model.save('exercise_6_custom_corpes.h5')



In [23]:
# SIMPLE INFERENCE SCRIPT
from tensorflow.keras.preprocessing import image

def predict_image(img_path, model_path='exercise_6_trained_model_improved.h5'):
    model = tf.keras.models.load_model(model_path)
    img = image.load_img(img_path, target_size=IMG_SIZE)
    img_array = image.img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)
    prob = float(model.predict(img_array, verbose=0)[0,0])
    if prob >= 0.5:
        label = "Apple"
        confidence = prob
    else:
        label = "Orange"
        confidence = 1 - prob
    print(f"Prediction: {label} (confidence: {confidence:.2%})")


In [24]:
# Example usage:
# Use actual image files from the test set for prediction
# You may need to adjust the filenames to match files present in your dataset
predict_image("apple_orange_split/test/apple/13_100.jpg")
predict_image("apple_orange_split/test/orange/8_100.jpg")



Prediction: Apple (confidence: 92.95%)




Prediction: Apple (confidence: 98.05%)
