In [14]:
import os
import cv2 # Used for reading and resizing images
import numpy as np # Used for working with arrays (image data)
from sklearn.model_selection import train_test_split # Used later for splitting data
# Used later for one-hot encoding labels, although for binary we might use it differently
from tensorflow.keras.utils import to_categorical
# Specific preprocessing function for ResNet50 from Keras
from tensorflow.keras.applications.resnet50 import preprocess_input

In [15]:
# Define the path to your dataset directory
DATA_DIR = './TrashNet Dataset' # Make sure this path is correct for your setup

# Define your original categories based on folder names
original_categories = ['cardboard', 'glass', 'metal', 'paper', 'plastic', 'trash']

# Define the binary categories
# 0 for degradable (paper, cardboard)
# 1 for non-degradable (glass, metal, plastic, trash)
binary_categories = ['degradable', 'non-degradable']

# Mapping original categories to binary categories
category_map = {
    'cardboard': 'degradable',
    'paper': 'degradable',
    'glass': 'non-degradable',
    'metal': 'non-degradable',
    'plastic': 'non-degradable',
    'trash': 'non-degradable'
}

# Define the image size required by ResNet-50
IMG_SIZE = (224, 224)

print("Data paths and categories defined.")

Data paths and categories defined.


In [16]:
images = [] # This list will store our resized image data
labels = [] # This list will store the corresponding binary labels

print(f"Loading images from: {DATA_DIR}...")

# Loop through each original category folder
for category in original_categories:
    path = os.path.join(DATA_DIR, category)
    # Get the binary label for this category
    binary_label_name = category_map[category]
    binary_label = binary_categories.index(binary_label_name) # Get the index (0 or 1)

    print(f"Processing category: {category} (Binary label: {binary_label_name})")

    # Loop through each image file in the category folder
    for img_name in os.listdir(path):
        # Create the full path to the image
        img_path = os.path.join(path, img_name)
        try:
            # Read the image using OpenCV
            img = cv2.imread(img_path)
            # Resize the image to the target size
            img = cv2.resize(img, IMG_SIZE)
            # Add the image data (as a NumPy array) and label to our lists
            images.append(np.array(img))
            labels.append(binary_label)
        except Exception as e:
            # Handle any errors that might occur while reading/processing an image
            print(f"Error loading image {img_path}: {e}")

# Convert the lists to NumPy arrays
images = np.array(images)
labels = np.array(labels)

print("Image loading and resizing complete.")
print(f"Total images loaded: {len(images)}")
print(f"Shape of images array: {images.shape}")
print(f"Shape of labels array: {labels.shape}")

Loading images from: ./TrashNet Dataset...
Processing category: cardboard (Binary label: degradable)
Processing category: glass (Binary label: non-degradable)
Processing category: metal (Binary label: non-degradable)
Processing category: paper (Binary label: degradable)
Processing category: plastic (Binary label: non-degradable)
Processing category: trash (Binary label: non-degradable)
Image loading and resizing complete.
Total images loaded: 2527
Shape of images array: (2527, 224, 224, 3)
Shape of labels array: (2527,)


In [17]:
# Normalize the images using ResNet-50's preprocess_input function
# This function scales pixel values according to the requirements of the model
images = preprocess_input(images)

print("Image normalization complete.")
print(f"Shape of normalized images array: {images.shape}")

Image normalization complete.
Shape of normalized images array: (2527, 224, 224, 3)


In [18]:
# First split: Separate out the test set (10%)
# We'll use 90% for the initial split into train and validation
X_train_val, X_test, y_train_val, y_test = train_test_split(images, labels, test_size=0.1, random_state=42, stratify=labels)
# Using stratify=labels is important to ensure that the proportion of degradable and non-degradable images
# is the same in the test set as in the original dataset.
print(f"Shape of training+validation images: {X_train_val.shape}")
print(f"Shape of testing images: {X_test.shape}")
print(f"Shape of training+validation labels: {y_train_val.shape}")
print(f"Shape of testing labels: {y_test.shape}")

# Second split: Split the remaining 90% into training (70% of total) and validation (20% of total)
# Since X_train_val is 90% of the original data, 20% of the original data is (20/90) of X_train_val
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=(0.2/0.9), random_state=42, stratify=y_train_val)
# Stratification is used again for the same reason

print("\nData splitting complete.")
print(f"Shape of training images: {X_train.shape}")
print(f"Shape of validation images: {X_val.shape}")
print(f"Shape of testing images: {X_test.shape}")
print(f"Shape of training labels: {y_train.shape}")
print(f"Shape of validation labels: {y_val.shape}")
print(f"Shape of testing labels: {y_test.shape}")

Shape of training+validation images: (2274, 224, 224, 3)
Shape of testing images: (253, 224, 224, 3)
Shape of training+validation labels: (2274,)
Shape of testing labels: (253,)

Data splitting complete.
Shape of training images: (1768, 224, 224, 3)
Shape of validation images: (506, 224, 224, 3)
Shape of testing images: (253, 224, 224, 3)
Shape of training labels: (1768,)
Shape of validation labels: (506,)
Shape of testing labels: (253,)


In [19]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
import tensorflow as tf # Import tensorflow

# Load the ResNet-50 model pre-trained on ImageNet weights
# include_top=False means we don't include the model's original classification layer
# pooling='avg' adds a Global Average Pooling layer after the last convolutional block,
# which is a common practice when using pre-trained models for transfer learning.
# input_shape specifies the shape of our input images
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3))

print("ResNet-50 base model loaded.")

ResNet-50 base model loaded.


In [20]:
# Add custom layers on top of the pre-trained model
x = base_model.output # Start from the output of the ResNet-50 base
x = GlobalAveragePooling2D()(x) # Add a Global Average Pooling layer
x = Dense(128, activation='relu')(x) # Add a Dense layer with ReLU activation (you can adjust the number of units, 128 is a common starting point)
x = Dropout(0.5)(x) # Add a Dropout layer with a dropout rate of 0.5
predictions = Dense(1, activation='sigmoid')(x) # Add the final Dense output layer with Sigmoid activation for binary classification

# Create the final model
model = Model(inputs=base_model.input, outputs=predictions)

print("Custom classification layers added.")
model.summary() # Print a summary of the model architecture

Custom classification layers added.


In [21]:
# Freeze the layers in the base model so they are not updated during the first phase of training
for layer in base_model.layers:
    layer.trainable = False

print("ResNet-50 base layers frozen.")

# You can print the model summary again to see that the trainable params have decreased
model.summary()

ResNet-50 base layers frozen.


In [22]:
# Compile the model
# optimizer: Adam with a learning rate of 1e-4
# loss: Binary Crossentropy for binary classification
# metrics: 'accuracy' to monitor classification accuracy
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
              loss='binary_crossentropy',
              metrics=['accuracy'])

print("Model compiled successfully.")

Model compiled successfully.


In [23]:
import tensorflow as tf

gpu_devices = tf.config.list_physical_devices('GPU')

if gpu_devices:
    print(f"Num GPUs Available: {len(gpu_devices)}")
    print("GPU Device(s):")
    for device in gpu_devices:
        print(f"  {device.name} -- Type: {device.device_type}")
    print("\nTensorFlow is configured to use the GPU.")
else:
    print("No GPU devices found. TensorFlow is using the CPU.")

print("Is built with CUDA: ", tf.test.is_built_with_cuda()) # This will likely still be False on Mac, which is expected

No GPU devices found. TensorFlow is using the CPU.
Is built with CUDA:  False


In [None]:
# Train the model
# X_train, y_train: your training data
# epochs: number of times to iterate over the entire training dataset
# batch_size: number of samples per gradient update
# validation_data: data to evaluate the loss and any model metrics at the end of each epoch
# history: This object will store the training history (loss and accuracy for both training and validation)
history = model.fit(X_train, y_train,
                    epochs=30,
                    batch_size=32,
                    validation_data=(X_val, y_val))

print("Model training complete.")

Epoch 1/30
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 949ms/step - accuracy: 0.6931 - loss: 0.6417 - val_accuracy: 0.9328 - val_loss: 0.1830
Epoch 2/30
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 1s/step - accuracy: 0.9167 - loss: 0.2158 - val_accuracy: 0.9486 - val_loss: 0.1346
Epoch 3/30
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 1s/step - accuracy: 0.9466 - loss: 0.1604 - val_accuracy: 0.9565 - val_loss: 0.1132
Epoch 4/30
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 1s/step - accuracy: 0.9479 - loss: 0.1473 - val_accuracy: 0.9644 - val_loss: 0.1034
Epoch 5/30
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 1s/step - accuracy: 0.9626 - loss: 0.1096 - val_accuracy: 0.9585 - val_loss: 0.0947
Epoch 6/30
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 1s/step - accuracy: 0.9577 - loss: 0.1068 - val_accuracy: 0.9664 - val_loss: 0.0958
Epoch 7/30
[1m56/56[0m [32m━━━━━━━