# Required Dependencies


In [4]:
# TensorFlow and Keras imports
import warnings

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

# Scikit-learn imports
from sklearn.metrics import classification_report, confusion_matrix

# Other imports
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import Image  # For displaying images in Jupyter notebooks
import glob




ModuleNotFoundError: No module named 'sklearn'

# Why waste classification?

In [None]:
from IPython.display import Image

# Display image
Image(r'D:Content\share-of-global-mismanaged-plastic-waste.png', width=900, height=700)


In [None]:
from IPython.display import Image

# Display image
Image(r'D:Content\stats e waste.png', width=900, height=700)

In [None]:
from IPython.display import Image

# Display image
Image(r'D:Content\waste crisis.png', width=900, height=700)

In [None]:
from IPython.display import Image

# Display image
Image(r'D:Content\sorting challenges.png', width=900, height=700)

# Waste Sorting is Risky and Time-Intensive

In [None]:
from IPython.display import Image

# Display image
Image(r'D:Content\wastesorting.png', width=900, height=700)

# Purpose:   
**Effiecient Deep Learning Model That Classifies Images as Organic or Recyclable**
# Data Structure
Data source: https://www.kaggle.com/datasets/wasifmahmood01/custom-waste-classification-dataset/data

In [None]:
from IPython.display import Image

# Display image
Image(r'D:Content\Dataset (4).png', width=900, height=700)


In [None]:
# Set the base directory for the dataset
BASE_DIR = r"C:\Users\Hp\pictures\back up\wastes"  # Change this to the path where your dataset is located

# Define the paths to the training and test directories
train_dir = os.path.join(BASE_DIR, 'train')  # Path to the training data
test_dir = os.path.join(BASE_DIR, 'test')    # Path to the test data


# Data Visualization

In [None]:
# Define your training directory path
train_dir = r"C:\Users\Hp\pictures\back up\wastes\train"  # Replace with your actual training folder path

# List all the class names in your training dataset
class_names = os.listdir(train_dir)

# Visualize and count the number of images in each class for training data
for class_name in class_names:
    class_dir = os.path.join(train_dir, class_name)
    class_images = glob.glob(os.path.join(class_dir, '*.jpg'))  # Assuming .jpg files
    print(f"Number of training samples in class '{class_name}': {len(class_images)}")

    # Visualizing some random images from each class (to get a sense of the data)
    if len(class_images) > 0:
        plt.figure(figsize=(10, 5))
        for i in range(5):  # Displaying 5 random images from this class
            plt.subplot(1, 5, i+1)
            img_path = np.random.choice(class_images)  # Randomly choose an image
            img = load_img(img_path, target_size=(224, 224))  # Load and resize image
            img_array = img_to_array(img) / 255.0  # Convert to array and normalize
            plt.imshow(img_array)
            plt.axis('off')
            plt.title(class_name)
        plt.show()


In [None]:
# Define your test directory path
test_dir = r"C:\Users\Hp\Videos\back up\wastes\test"  # Replace with your actual test folder path
IMG_SIZE = (224, 224)
# List all the class names in your test dataset
class_names = os.listdir(test_dir)

# Visualize and count the number of images in each class for test data
for class_name in class_names:
    class_dir = os.path.join(test_dir, class_name)
    class_images = glob.glob(os.path.join(class_dir, '*.jpg'))  # Assuming .jpg files
    print(f"Number of test samples in class '{class_name}': {len(class_images)}")

    # Visualizing some random images from each class (to get a sense of the data)
    if len(class_images) > 0:
        plt.figure(figsize=(10, 5))
        for i in range(5):  # Displaying 5 random images from this class
            plt.subplot(1, 5, i+1)
            img_path = np.random.choice(class_images)  # Randomly choose an image
            img = load_img(img_path, target_size=(IMG_SIZE))  # Load and resize image
            img_array = img_to_array(img) / 255.0  # Convert to array and normalize
            plt.imshow(img_array)
            plt.axis('off')
            plt.title(class_name)
        plt.show()




# **Data Augmentation**

Data Augmentation is a technique that helps to increase the diversity of your training data without actually collecting new data. It artificially expands the size of the training dataset by applying random transformations to the images. This helps the model generalize better and reduces overfitting.

---

## **Here are the transformations we will apply to the training data:**

### 1. **Rotation:** 
   - Random rotation of images by a certain degree.

### 2. **Shifting:** 
   - Random horizontal and vertical shifts.

### 3. **Shear:** 
   - Shear transformations that make the images slanted.

### 4. **Zoom:** 
   - Zooming into the image by a random factor.

### 5. **Flipping:** 
   - Horizontal flipping of the images.

### 6. **Rescaling:** 
   - Normalize the image pixel values to [0, 1] to make them consistent with the pretrained model inputs.

---

## **Data Augmentation for Training Set:**

The training data will undergo multiple random transformations to increase the diversity of the dataset.

### 1. **Rotation Range:** 
   - Rotate images by up to 30 degrees.

### 2. **Width and Height Shift:** 
   - Shift the images horizontally and vertically by up to 20%.

### 3. **Zoom Range:** 
   - Zoom in and out by up to 20%.

### 4. **Shear Range:** 
   - Apply shear transformations (slanting) up to 20%.

### 5. **Horizontal Flip:** 
   - Randomly flip images horizontally.

---

In [None]:
# Set parameters for image size and batch size
IMG_SIZE = (224, 224)  # Resize images to 224x224 (common for models like MobileNetV2)
BATCH_SIZE = 32  # Define batch size for training and testing
BASE_DIR = r"C:\Users\Hp\Videos\back up\wastes"  # Path to the dataset

# Train data generator with augmentation
train_datagen = ImageDataGenerator(
    rescale=1.0/255,  # Normalize image pixel values to [0, 1]
    rotation_range=30,  # Rotate images by up to 30 degrees
    width_shift_range=0.2,  # Shift images horizontally by up to 20%
    height_shift_range=0.2,  # Shift images vertically by up to 20%
    shear_range=0.2,  # Apply shear transformations (slanting) up to 20%
    zoom_range=0.2,  # Zoom in and out by up to 20%
    horizontal_flip=True,  # Randomly flip images horizontally
    fill_mode='nearest'  # Fill in missing pixels during transformations
)

# Test data generator (no augmentation, only rescaling)
test_datagen = ImageDataGenerator(rescale=1.0/255)  # Normalize image pixel values to [0, 1]

# Training data generator using the training directory
train_generator = train_datagen.flow_from_directory(
    directory=os.path.join(BASE_DIR , 'train'),  # Path to training dataset
    target_size=IMG_SIZE,  # Resize images to IMG_SIZE
    batch_size=BATCH_SIZE,  # Batch size for training
    class_mode='categorical'  # Multi-class classification
)

# Test data generator using the test directory
test_generator = test_datagen.flow_from_directory(
    directory=os.path.join(BASE_DIR , 'test'),  # Path to test dataset
    target_size=IMG_SIZE,  # Resize images to IMG_SIZE
    batch_size=BATCH_SIZE,  # Batch size for testing
    class_mode='categorical',  # Multi-class classification
    shuffle=False  # Don't shuffle test data for evaluation
)


In [None]:
# Get class labels from the training dataset
class_labels = train_generator.class_indices  # returns a dictionary of class names and their indices

# Print the class labels and their corresponding encoded values
print("Class Labels and Encoded Values:")
print(class_labels)

reversed_labels = {v: k for k, v in class_labels.items()}
print("\nReversed Encoded Labels:")
print(reversed_labels)


In [None]:
# Import necessary callbacks
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Define the path for saving model weights
filepath = './final_model_weights.keras'  # Change the extension to .keras

# Early stopping callback: monitors the validation AUC (Area Under Curve)
earlystopping = EarlyStopping(monitor='val_auc',  # Monitors validation AUC
                              mode='max',         # Stops when the validation AUC does not improve
                              patience=5,         # Patience is the number of epochs with no improvement before stopping
                              verbose=1)          # Prints messages when stopping

# Model checkpoint callback: saves the best model based on validation AUC
checkpoint = ModelCheckpoint(filepath,            # Filepath to save the best model weights
                              monitor='val_auc',  # Monitors validation AUC
                              mode='max',         # Save the model with the highest AUC
                              save_best_only=True,  # Only save the best model
                              verbose=1)          # Prints messages when saving the model

# List of callbacks to use during training
callback_list = [earlystopping, checkpoint]


# Model Development

In [None]:


# Base model: MobileNetV2 pre-trained on ImageNet
base_model = MobileNetV2(input_shape=(224, 224, 3),  # Adjust input size as per your requirements
                          include_top=False,          # Exclude the top classification layers
                          weights="imagenet")         # Load pre-trained weights from ImageNet


In [None]:
# Freezing all layers in the base model
for layer in base_model.layers:
    layer.trainable = False


In [None]:
# Import necessary modules
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, BatchNormalization
from tensorflow.keras.applications import MobileNetV2

# Load the pretrained MobileNetV2 model without the top (fully connected) layers
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False  # Freeze the base model layers

# Redefining the model with reduced dense layer sizes
model = Sequential()

# Add the base model (MobileNetV2)
model.add(base_model)

# Add dropout and flatten layers
model.add(Dropout(0.2))  # Dropout layer for regularization
model.add(Flatten())  # Flatten the output of base model to feed into fully connected layers

# Add batch normalization and dense layers with smaller sizes
model.add(BatchNormalization())  # Normalize the output of previous layer
model.add(Dense(512, activation="relu", kernel_initializer='he_uniform'))  # First dense layer
model.add(BatchNormalization())  # Another batch normalization layer
model.add(Dropout(0.2))  # Dropout layer for regularization

model.add(Dense(256, activation="relu", kernel_initializer='he_uniform'))  # Second dense layer
model.add(BatchNormalization())  # Batch normalization
model.add(Dropout(0.2))  # Dropout for regularization

model.add(Dense(128, activation="relu", kernel_initializer='he_uniform'))  # Third dense layer
model.add(Dropout(0.2))  # Dropout layer

# Final output layer with 9 classes and softmax activation
model.add(Dense(9, activation="softmax"))  # Softmax activation for multi-class classification


In [None]:
# Compiling the Model
import warnings
warnings.filterwarnings("ignore", category=UserWarning, message="Your `PyDataset` class should call `super().__init__(**kwargs)`")

model.compile(optimizer=Adam(learning_rate=0.0001),  # Adam optimizer with a small learning rate
              loss='categorical_crossentropy',  # Using categorical crossentropy for multi-class classification
              metrics=['accuracy'])  # Metrics to track during training

# Training the Model
history = model.fit(
    train_generator,  # The training data generator
    epochs=15,  # Number of epochs
    validation_data=test_generator,  # Validation data generator
    callbacks=callback_list  # List of callbacks (early stopping and checkpoint)
)


# Saving the Final Model
model.save("mobilenetv2_waste_classification_final.h5")
print("Final Model Saved as mobilenetv2_waste_classification_final.h5")

# Evaluating the Model on Test Data
test_loss, test_accuracy = model.evaluate(test_generator)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")# Plot training performance
plt.figure(figsize=(10, 5))
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['loss'], label='Train Loss')
plt.legend()
plt.title('Training Performance')
plt.show()

# Save the model
model.save("mobilenetv2_waste_classification.h5")
print("Model saved as mobilenetv2_waste_classification.h5")

# Evaluate the model on test data
test_loss, test_accuracy = model.evaluate(test_generator)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")


In [None]:
from sklearn.metrics import classification_report
import numpy as np

test_generator.reset()
y_pred = model.predict(test_generator)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = test_generator.classes
class_labels = list(test_generator.class_indices.keys())

print("\nClassification Report:\n", classification_report(y_true, y_pred_classes, target_names=class_labels))

In [None]:
# Import additional necessary libraries
import numpy as np
import seaborn as sns
from sklearn.metrics import confusion_matrix

# Make predictions on the test data
test_predictions = model.predict(test_generator, verbose=1)

# Convert predictions to label indices
predicted_labels = np.argmax(test_predictions, axis=1)

# Get the true labels from the test_generator
true_labels = test_generator.classes

# Generate the confusion matrix
cm = confusion_matrix(true_labels, predicted_labels)

# Plot confusion matrix using seaborn heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=test_generator.class_indices.keys(),
            yticklabels=test_generator.class_indices.keys(), cbar=False)
plt.title('Confusion Matrix')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()



# Model prediction

In [None]:


# Load the trained model
model = load_model('mobilenetv2_waste_classification.h5')

# Set image size and path
IMG_SIZE = (224, 224)  # Same as the model input size
img_path = r"C:\Users\Hp\Videos\back up\wastes\test\E-waste\E-waste (216).jpg"  # Replace with the path to the image you want to predict

# Preprocess the image
img = image.load_img(img_path, target_size=IMG_SIZE)  # Load and resize the image
img_array = image.img_to_array(img)  # Convert image to array
img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
img_array = img_array / 255.0  # Rescale the image

# Make a prediction
predictions = model.predict(img_array)

# Get the predicted class
predicted_class_index = np.argmax(predictions, axis=1)[0]  # Get the index of the highest predicted class

# Manually define the class labels (use the folder names in your dataset)
# Replace this path with the actual path to your 'train' directory
train_dir = r"C:\Users\Hp\Videos\back up\wastes\train"
class_labels = sorted(os.listdir(train_dir))  # Sort to ensure consistent order

# Get the predicted class label
predicted_class = class_labels[predicted_class_index]

# Output the prediction
print(f"Predicted class: {predicted_class}")

# Display the image
plt.imshow(img)
plt.title(f"Predicted: {predicted_class}")
plt.axis('off')
plt.show()


In [None]:
import os

# Load the trained model
model = load_model('mobilenetv2_waste_classification.h5')

# Set image size and path
IMG_SIZE = (224, 224)  # Same as the model input size
img_path = r"C:\Users\Hp\Videos\back up\wastes\test\organic waste\Organic waste (5).jpeg"  # Replace with the path to the image you want to predict

# Preprocess the image
img = image.load_img(img_path, target_size=IMG_SIZE)  # Load and resize the image
img_array = image.img_to_array(img)  # Convert image to array
img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
img_array = img_array / 255.0  # Rescale the image

# Make a prediction
predictions = model.predict(img_array)

# Get the predicted class
predicted_class_index = np.argmax(predictions, axis=1)[0]  # Get the index of the highest predicted class

# Manually define the class labels (use the folder names in your dataset)
# Replace this path with the actual path to your 'train' directory
train_dir = r"C:\Users\Hp\Videos\back up\wastes\train"
class_labels = sorted(os.listdir(train_dir))  # Sort to ensure consistent order

# Get the predicted class label
predicted_class = class_labels[predicted_class_index]

# Output the prediction
print(f"Predicted class: {predicted_class}")

# Display the image
plt.imshow(img)
plt.title(f"Predicted: {predicted_class}")
plt.axis('off')
plt.show()


In [None]:


# Load the trained model
model = load_model('mobilenetv2_waste_classification.h5')

# Set image size and path
IMG_SIZE = (224, 224)  # Same as the model input size
img_path = r"C:\Users\Hp\Videos\back up\wastes\train\automobile wastes\automobile waste (20).jpg"  # Replace with the path to the image you want to predict

# Preprocess the image
img = image.load_img(img_path, target_size=IMG_SIZE)  # Load and resize the image
img_array = image.img_to_array(img)  # Convert image to array
img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
img_array = img_array / 255.0  # Rescale the image

# Make a prediction
predictions = model.predict(img_array)

# Get the predicted class
predicted_class_index = np.argmax(predictions, axis=1)[0]  # Get the index of the highest predicted class

# Manually define the class labels (use the folder names in your dataset)
# Replace this path with the actual path to your 'train' directory
train_dir = r"C:\Users\Hp\Videos\back up\wastes\train"
class_labels = sorted(os.listdir(train_dir))  # Sort to ensure consistent order

# Get the predicted class label
predicted_class = class_labels[predicted_class_index]

# Output the prediction
print(f"Predicted class: {predicted_class}")

# Display the image
plt.imshow(img)
plt.title(f"Predicted: {predicted_class}")
plt.axis('off')
plt.show()


In [None]:


# Load the trained model
model = load_model('mobilenetv2_waste_classification.h5')

# Set image size and path
IMG_SIZE = (224, 224)  # Same as the model input size
img_path = r"C:\Users\Hp\Videos\back up\wastes\test\paper waste\IMG20250118011726.jpg"  # Replace with the path to the image you want to predict

# Preprocess the image
img = image.load_img(img_path, target_size=IMG_SIZE)  # Load and resize the image
img_array = image.img_to_array(img)  # Convert image to array
img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
img_array = img_array / 255.0  # Rescale the image

# Make a prediction
predictions = model.predict(img_array)

# Get the predicted class
predicted_class_index = np.argmax(predictions, axis=1)[0]  # Get the index of the highest predicted class

# Manually define the class labels (use the folder names in your dataset)
# Replace this path with the actual path to your 'train' directory
train_dir = r"C:\Users\Hp\Videos\back up\wastes\train"
class_labels = sorted(os.listdir(train_dir))  # Sort to ensure consistent order

# Get the predicted class label
predicted_class = class_labels[predicted_class_index]

# Output the prediction
print(f"Predicted class: {predicted_class}")

# Display the image
plt.imshow(img)
plt.title(f"Predicted: {predicted_class}")
plt.axis('off')
plt.show()
