# Import Data

In [1]:
import os
import shutil
import requests
import zipfile
import random

# Download the dataset
url = "https://github.com/garythung/trashnet/raw/master/data/dataset-resized.zip"
response = requests.get(url)

with open("dataset-resized.zip", "wb") as f:
    f.write(response.content)

# Extract the dataset
with zipfile.ZipFile("dataset-resized.zip", "r") as z:
    z.extractall("trashnet_dataset")

# Define the categories and directory structure
categories = ["cardboard", "glass", "metal", "paper", "plastic", "trash"]
base_dir = "trashnet_dataset/dataset-resized"
train_dir = "trashnet_dataset/train"
val_dir = "trashnet_dataset/val"
test_dir = "trashnet_dataset/test"

# Create the directories
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Split ratio
train_ratio = 0.8
val_ratio = 0.1

# Split the dataset
for category in categories:
    os.makedirs(os.path.join(train_dir, category), exist_ok=True)
    os.makedirs(os.path.join(val_dir, category), exist_ok=True)
    os.makedirs(os.path.join(test_dir, category), exist_ok=True)

    category_dir = os.path.join(base_dir, category)
    images = os.listdir(category_dir)
    random.shuffle(images)

    train_size = int(len(images) * train_ratio)
    val_size = int(len(images) * val_ratio)

    train_images = images[:train_size]
    val_images = images[train_size:train_size + val_size]
    test_images = images[train_size + val_size:]

    for image in train_images:
        shutil.copy(os.path.join(category_dir, image), os.path.join(train_dir, category, image))
    for image in val_images:
        shutil.copy(os.path.join(category_dir, image), os.path.join(val_dir, category, image))
    for image in test_images:
        shutil.copy(os.path.join(category_dir, image), os.path.join(test_dir, category, image))


# Preprocessing

In [3]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define the image size and batch size
image_size = (224, 224)
batch_size = 32

# Create an ImageDataGenerator for the training set with data augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Create an ImageDataGenerator for the validation and testing sets without data augmentation
val_test_datagen = ImageDataGenerator(rescale=1./255)

# Load and preprocess the training set
train_generator = train_datagen.flow_from_directory(
    "trashnet_dataset/train",
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical'
)

# Load and preprocess the validation set
val_generator = val_test_datagen.flow_from_directory(
    "trashnet_dataset/val",
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical'
)

# Load and preprocess the testing set
test_generator = val_test_datagen.flow_from_directory(
    "trashnet_dataset/test",
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical'
)


2023-04-15 16:58:40.866327: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Found 2019 images belonging to 6 classes.
Found 251 images belonging to 6 classes.
Found 257 images belonging to 6 classes.


# Train Data

In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# Define the CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(6, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(
    train_generator,
    epochs=25,
    validation_data=val_generator,
    steps_per_epoch=len(train_generator),
    validation_steps=len(val_generator)
)

# Save the model
model.save('trash_classifier.h5')

Epoch 1/25


2023-04-15 17:03:26.244538: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]




2023-04-15 17:04:42.296294: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [5]:
# Evaluate the model on the testing dataset
test_loss, test_accuracy = model.evaluate(test_generator, steps=len(test_generator))

print(f"Test loss: {test_loss:.4f}")
print(f"Test accuracy: {test_accuracy:.4f}")


2023-04-15 17:36:36.613724: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype int32
	 [[{{node Placeholder/_0}}]]


Test loss: 0.8220
Test accuracy: 0.7354


# Collecting Trash Can Images

In [7]:
import os
import subprocess

def download_open_images_dataset(class_name, limit, dataset_dir):
    subprocess.run(
        [
            "oidv6",
            "downloader",
            "--dataset",
            "train",
            "--classes",
            class_name,
            "--type_csv",
            "train",
            "--limit",
            str(limit),
            "--multiclasses",
            "1",
            "--download_folder",
            dataset_dir,
            "--yes",  # Add the --yes flag to automatically confirm prompts
        ],
        check=True,
    )

if __name__ == "__main__":
    # Define the class name, number of images to download, and the dataset directory
    class_name = "Waste container"
    limit = 1000
    dataset_dir = "trash_cans_dataset"

    # Create the dataset directory if it doesn't exist
    os.makedirs(dataset_dir, exist_ok=True)

    # Download the images from the Open Images dataset
    download_open_images_dataset(class_name, limit, dataset_dir)


[H[2J--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------


        Automatic download ... 24 out of 24 (100%)                             
        Automatic download ... 4411031 out of 4411031 (100%)                   
        Automatic download ... 1000 out of 1000 (100%)                         
    Formation of labels ... 1000 out of 1000 (100%)                            


--------------------------------------------------------------------------------
[1m[94mOIDv6 - Downloading single or multiple classes from the Open Images V6 Dataset ...[0m
[2023-04-15 18:50:30] Start: 
	Author: Dmitry Ryumin
	Email: dl_03.03.1991@mail.ru
	Maintainer: Dmitry Ryumin
	Version: 1.0.5
[2023-04-15 18:50:30] Checking command line arguments for validity ...
[2023-04-15 18:50:30] Creating Directories for Metadata ...
[2023-04-15 18:50:30] Downloading "waste container" ...
    File "class-descriptions-boxable.csv" not found ...
    File "oidv6-train-annotations-bbox.csv" not found ...
    Extracting data from "oidv6-train-annotations-bbox.csv" ...
    Total "train" images 1020 ... of which will be loaded 1000 ...
