# TensorFlor CNN and image processing

This notebook contains the python code responsible for loading, processing and labeling the images taken by our camScript.py program on our Raspberry Pi 5. The notebook also contains the code for our CNN model specification and code for using the model.

The model in use is a sequential model.

After building the model, we feed it a test image and see if it can correctly classify it.

We also have code in this file for converting the model to a TFLite model, which is smaller in size, such that it can hopefully live on a microcontroller.

In [None]:
import tensorflow as tf
import cv2
import os
import matplotlib.pyplot as plt
import numpy as np
import math
from sklearn.model_selection import train_test_split
to_categorical = tf.keras.utils.to_categorical

## Image Processing

This python program was created to, process the images, which will be used to train our CNN. 

The program loads the images from the 'Puctures' directory, then rotates them 90 degrees clockwize, to make them appear up right.


In [None]:
'''
Made with help from:
1. https://www.geeksforgeeks.org/image-processing-in-python/
2. ChatGPT
'''

# Define image parameters
IMAGE_SIZE = (150, 150)
DATASET_PATH = "../Local_Things/Pictures"

# Lists to store images and labels
x_data = []
y_data = []

# Get class names from directory
class_names = sorted(os.listdir(DATASET_PATH))  # Ensures labels are consistent
class_indices = {name: idx for idx, name in enumerate(class_names)}  # Assigns class numbers

# Process each class folder
for class_name in class_names:
    class_path = os.path.join(DATASET_PATH, class_name)
    
    if not os.path.isdir(class_path):
        continue

    for img_name in os.listdir(class_path):
        img_path = os.path.join(class_path, img_name)
        img = cv2.imread(img_path)
        
        if img is None:
            print(f"Skipping {img_name}, could not read.")
            continue

        # Convert BGR to RGB
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Resize image
        img = cv2.resize(img, IMAGE_SIZE, interpolation=cv2.INTER_CUBIC)

        # Rotate image if needed
        center = (img.shape[1] // 2, img.shape[0] // 2)
        rotation_matrix = cv2.getRotationMatrix2D(center, angle=-90, scale=1)
        img = cv2.warpAffine(img, rotation_matrix, (img.shape[1], img.shape[0]))

        # Normalize pixel values (optional but recommended)
        img = img / 255.0  

        # Store processed image and corresponding label
        x_data.append(img)
        y_data.append(class_indices[class_name])  # Integer label

# Convert lists to NumPy arrays
x_data = np.array(x_data, dtype=np.float32)
y_data = np.array(y_data, dtype=np.int32)

# Convert labels to one-hot encoding
y_data = to_categorical(y_data, num_classes=len(class_names))

# Split into training & testing sets
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2, random_state=42)

# Plot images to see of they are processed correctly
def plot_images(images, labels, class_names, images_per_row=5):
    total_images = len(images)
    rows = math.ceil(total_images / images_per_row)  # Calculate needed rows

    plt.figure(figsize=(images_per_row * 3, rows * 3))  # Adjust figure size dynamically
    
    for i in range(total_images):
        plt.subplot(rows, images_per_row, i + 1)
        plt.imshow(images[i])
        plt.title(class_names[np.argmax(labels[i])])
        plt.axis("off")

    plt.tight_layout()
    plt.show()

plot_images(x_train, y_train, class_names)

## CNN Model Training

We're using a sequencial model from the getting started section of the tensorflow website:
https://www.tensorflow.org/tutorials/quickstart/beginner

In [None]:
# Define a CNN model
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(2)  # No softmax here, since we'll use `from_logits=True`
])

print(y_train.shape)

# Compile the model with correct loss function
loss_fn = tf.keras.losses.CategoricalCrossentropy(from_logits=True)

model.compile(optimizer='adam',
              loss=loss_fn,
              metrics=['accuracy'])

# Ensure x_train and y_train are in the correct format
print(f"x_train shape: {x_train.shape}")  # Should be (num_samples, 150, 150, 3)
print(f"y_train shape: {y_train.shape}")  # Should be (num_samples,)

# Train the model
model.fit(x_train, y_train, epochs=5)

# Evaluate on test set
model.evaluate(x_test, y_test, verbose=2)

# Create a probability model for predictions
probability_model = tf.keras.Sequential([
    model,
    tf.keras.layers.Softmax()
])

# Get predictions
predictions = probability_model(x_test[:5])
print(predictions)


## Save the model

In [None]:
model.save("my_model.h5")

## Convert model to TFLite model

The idea is to have a model that will fit on a microcontroller. The model will still be trained on a device with the necessary capabilities.

By converting the model to a TFLite model, and using the converter.optimizations method as shown below, we have managed to reduce the size of the model from 122M to 11M.

In [None]:
# Load the model
MODEL_PATH = "my_model.h5"
model = tf.keras.models.load_model(MODEL_PATH)

# Convert to tflite
converter = tf.lite.TFLiteConverter.from_keras_model(model)

# Convert all float32 to int8, minimizing the model, making it about 4 times smaller.
converter.optimizations = [tf.lite.Optimize.DEFAULT]

# Convert model
tflite_model = converter.convert()

# Save the TFLite model
tflite_model_path = 'model.tflite'
with open(tflite_model_path, 'wb') as f:
    f.write(tflite_model)


# Using the trained model

In [None]:
# Define constants
IMAGE_SIZE = (150, 150)  # Same as training size
MODEL_PATH = "my_model.h5"  # Path to your trained model
IMAGE_PATH = "../Local_Things/Test_images/karse.jpg"  # Replace with actual image path

# Load and preprocess the image
def preprocess_image(image_path):
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
    img = cv2.resize(img, IMAGE_SIZE)  # Resize
    img = img / 255.0  # Normalize
    img_expanded = np.expand_dims(img, axis=0)  # Add batch dimension
    return img, img_expanded  # Return both original and processed images

# Load trained model
model = tf.keras.models.load_model(MODEL_PATH)

# Preprocess input image
original_image, image = preprocess_image(IMAGE_PATH)

# Get model prediction
prediction = model.predict(image)

# Convert probabilities to class label
predicted_class = np.argmax(prediction)  # 0 or 1

# Define class names (Modify according to your dataset)
class_names = ["Not ready", "Ready"]  # Replace with actual class names
predicted_label = class_names[predicted_class] 

# Print probabilities
probabilities = tf.nn.softmax(prediction).numpy() 
print(f"Class Probabilities: {probabilities}")

# Plot image with prediction
plt.figure(figsize=(6, 6))
plt.imshow(original_image)
plt.title(f"Predicted: {predicted_label}", fontsize=14, fontweight="bold")
plt.axis("off")
plt.show()

## Using training and using MobileNet v3 model with our own data

In [None]:
# https://www.kaggle.com/models/timm/tf-mobilenet-v3
# Made with help from chatGPT

from tensorflow.keras.applications import MobileNetV3Small
from tensorflow.keras import layers, models

# Load MobileNetV3 with 150x150 input shape
base_model = MobileNetV3Small(input_shape=(150, 150, 3), include_top=False, weights=None)  # No pretrained weights

# Add classification layers
mobileNetv3_model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(2, activation='softmax')  # Adjust based on number of classes
])

mobileNetv3_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

mobileNetv3_model.fit(x_train, y_train, epochs=5, batch_size=32)

mobileNetv3_model.evaluate(x_test, y_test, verbose=2)

prediction = mobileNetv3_model.predict(image)

predicted_class = np.argmax(prediction)
# Convert probabilities to class label
class_names = ["Not ready", "Ready"]  # 0 or 1
predicted_label = class_names[predicted_class] 

# Print probabilities
probabilities = tf.nn.softmax(prediction).numpy() 
print(f"Class Probabilities: {probabilities}")

# Plot image with prediction
plt.figure(figsize=(6, 6))
plt.imshow(original_image)
plt.title(f"Predicted: {predicted_label}", fontsize=14, fontweight="bold")
plt.axis("off")
plt.show()