In [2]:
# ==============================================================================
# Part 2, Task 1: Edge AI Prototype (Recyclable Item Classifier)
#
# This script will:
# 1. Set up the Colab environment and download the TrashNet dataset.
# 2. Preprocess the data and set up training/validation sets.
# 3. Build and train a lightweight MobileNetV2 model.
# 4. Evaluate the Keras model.
# 5. Convert the Keras model to a quantized TensorFlow Lite model.
# 6. Test the TFLite model's performance and accuracy in Colab.
# 7. Provide a sample deployment script for a Raspberry Pi.
# ==============================================================================

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

import matplotlib.pyplot as plt
import numpy as np
import os
import pathlib
import time

# === STEP 1: DOWNLOAD AND PREPARE THE DATASET ===

print("--- Step 1: Downloading Dataset ---")

# We will use 'opendatasets' to easily fetch the Kaggle dataset
# You will be prompted to enter your Kaggle username and API key
!pip install -q opendatasets

import opendatasets as od

# Dataset URL from Kaggle
dataset_url = 'https://www.kaggle.com/datasets/asdasdasasdas/garbage-classification'

# Download the dataset
od.download(dataset_url)

# Define the data directory path
data_dir = pathlib.Path('./garbage-classification/Garbage classification')

# Verify the download
image_count = len(list(data_dir.glob('*/*.jpg')))
print(f"\nSuccessfully downloaded {image_count} images.")

# === STEP 2: LOAD AND PREPROCESS DATA ===

print("\n--- Step 2: Loading and Preprocessing Data ---")

# Define parameters for the data loader
BATCH_SIZE = 32
IMG_HEIGHT = 224
IMG_WIDTH = 224
IMG_SIZE = (IMG_HEIGHT, IMG_WIDTH)

# Create the training dataset (80% of data)
train_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="training",
  seed=123,
  image_size=IMG_SIZE,
  batch_size=BATCH_SIZE
)

# Create the validation dataset (20% of data)
val_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="validation",
  seed=123,
  image_size=IMG_SIZE,
  batch_size=BATCH_SIZE
)

# Get the class names
class_names = train_ds.class_names
print(f"Class names: {class_names}")

# Configure the dataset for performance
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

# === STEP 3: BUILD THE MODEL WITH TRANSFER LEARNING ===

print("\n--- Step 3: Building Model (MobileNetV2) ---")

# Define a data augmentation layer to prevent overfitting
data_augmentation = keras.Sequential(
  [
    layers.RandomFlip("horizontal", input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
  ]
)

# Load the MobileNetV2 base model (pre-trained on ImageNet)
# include_top=False means we don't load the final classification layer
IMG_SHAPE = IMG_SIZE + (3,)
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
                                               include_top=False,
                                               weights='imagenet')

# Freeze the base model so we only train our new layers
base_model.trainable = False

# Create the new model on top
inputs = tf.keras.Input(shape=IMG_SHAPE)
x = data_augmentation(inputs) # Apply augmentation
x = tf.keras.applications.mobilenet_v2.preprocess_input(x) # Preprocess for MobileNetV2
x = base_model(x, training=False) # Run the base model
x = layers.GlobalAveragePooling2D()(x) # Pool the features
x = layers.Dropout(0.2)(x) # Add dropout for regularization
outputs = layers.Dense(len(class_names))(x) # Add our new classification layer
model = tf.keras.Model(inputs, outputs)

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

model.summary()

# === STEP 4: TRAIN THE MODEL ===

print("\n--- Step 4: Training the Model ---")
EPOCHS = 10

history = model.fit(
  train_ds,
  validation_data=val_ds,
  epochs=EPOCHS
)

# Evaluate the final Keras model
print("\nEvaluating final Keras model...")
loss, accuracy = model.evaluate(val_ds)
print(f"Final Keras Model Accuracy: {accuracy:.2%}")

# Save the Keras model
model.save('recyclable_model.h5')
print("Keras model saved as 'recyclable_model.h5'")

# === STEP 5: CONVERT TO TENSORFLOW LITE (TFLITE) ===

print("\n--- Step 5: Converting to TensorFlow Lite (Quantized) ---")

# Initialize the TFLiteConverter from the Keras model
converter = tf.lite.TFLiteConverter.from_keras_model(model)

# Apply default optimizations (this includes quantization)
converter.optimizations = [tf.lite.Optimize.DEFAULT]

# Convert the model
tflite_model_quant = converter.convert()

# Save the TFLite model to a file
with open('model_quant.tflite', 'wb') as f:
  f.write(tflite_model_quant)

print("Quantized TFLite model saved as 'model_quant.tflite'")

# Compare file sizes
keras_size = os.path.getsize('recyclable_model.h5') / (1024 * 1024)
tflite_size = os.path.getsize('model_quant.tflite') / (1024 * 1024)

print(f"\nKeras Model Size:    {keras_size:.2f} MB")
print(f"TFLite Model Size: {tflite_size:.2f} MB")
print(f"TFLite model is {keras_size/tflite_size:.1f}x smaller!")

# === STEP 6: TEST THE TFLITE MODEL (SIMULATION) ===

print("\n--- Step 6: Testing TFLite Model Performance ---")

# Load the TFLite model with the Interpreter
interpreter = tf.lite.Interpreter(model_path='model_quant.tflite')
interpreter.allocate_tensors()

# Get input and output tensor details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# --- Test inference time ---
# We run a dummy inference to "warm up" the interpreter
interpreter.set_tensor(input_details[0]['index'], np.zeros((1, 224, 224, 3), dtype=np.float32))
interpreter.invoke()

# Now, time the inference
test_image = np.zeros((1, 224, 224, 3), dtype=np.float32)
start_time = time.time()
interpreter.set_tensor(input_details[0]['index'], test_image)
interpreter.invoke()
end_time = time.time()
print(f"TFLite Inference Time (Colab CPU): {(end_time - start_time) * 1000:.2f} ms")

# --- Test accuracy on a batch from the validation set ---
correct_predictions = 0
total_images = 0

for images, labels in val_ds.take(5): # Test on 5 batches
  for i in range(len(labels)):
    total_images += 1
    # Get a single image and expand dimensions
    img = images[i]
    img = tf.expand_dims(img, 0) # Add batch dimension

    # Set the tensor
    interpreter.set_tensor(input_details[0]['index'], img)

    # Run inference
    interpreter.invoke()

    # Get the output
    output_data = interpreter.get_tensor(output_details[0]['index'])
    prediction = np.argmax(output_data)

    if prediction == labels[i]:
      correct_predictions += 1

tflite_accuracy = correct_predictions / total_images
print(f"TFLite Model Accuracy (on {total_images} images): {tflite_accuracy:.2%}")

print("\n=== TASK 1 COMPLETE ===")

# === STEP 7: SAMPLE DEPLOYMENT SCRIPT (FOR RASPBERRY PI) ===

# The following code is NOT for Colab. It is what you would
# save as a 'classify.py' file on your Raspberry Pi.

pi_script = """
import tflite_runtime.interpreter as tflite
import numpy as np
from PIL import Image
import sys

# --- CONFIG ---
MODEL_PATH = "model_quant.tflite"
IMAGE_PATH = sys.argv[1] # Get image path from command line
IMG_HEIGHT = 224
IMG_WIDTH = 224
CLASS_NAMES = ['cardboard', 'glass', 'metal', 'paper', 'plastic', 'trash']
# --- END CONFIG ---

def load_model(model_path):
    \"\"\"Loads the TFLite model and allocates tensors.\"\"\"
    interpreter = tflite.Interpreter(model_path=model_path)
    interpreter.allocate_tensors()
    return interpreter

def preprocess_image(image_path, img_size):
    \"\"\"Loads and preprocesses an image for the model.\"\"\"
    img = Image.open(image_path).resize(img_size)
    img = np.array(img, dtype=np.float32)
    # Add a batch dimension
    input_data = np.expand_dims(img, axis=0)
    return input_data

def run_inference(interpreter, input_data):
    \"\"\"Runs inference on the preprocessed image.\"\"\"
    # Get I/O details
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    # Set the input tensor
    interpreter.set_tensor(input_details[0]['index'], input_data)

    # Run inference
    interpreter.invoke()

    # Get the output
    output_data = interpreter.get_tensor(output_details[0]['index'])
    return output_data

def main():
    if len(sys.argv) != 2:
        print("Usage: python classify.py <image_path>")
        return

    print("Loading TFLite model...")
    interpreter = load_model(MODEL_PATH)

    print(f"Processing image: {IMAGE_PATH}")
    input_data = preprocess_image(IMAGE_PATH, (IMG_HEIGHT, IMG_WIDTH))

    print("Running inference...")
    raw_prediction = run_inference(interpreter, input_data)

    # Post-process the result
    predicted_index = np.argmax(raw_prediction)
    predicted_class = CLASS_NAMES[predicted_index]
    confidence = np.max(raw_prediction) # Note: this is raw logit, not probability

    print("--- RESULT ---")
    print(f"Predicted Class: {predicted_class}")
    print("--------------")

if __name__ == "__main__":
    main()
"""

print("\n--- Sample Raspberry Pi Deployment Script (classify.py) ---")
print(pi_script)

--- Step 1: Downloading Dataset ---
Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username:

Abort: 