In [None]:
import os
import git

REPO_URL = "https://github.com/nikhilvenkatkumsetty/TrashBox.git"
DATASET_DIR = "TrashBox"

if not os.path.exists(DATASET_DIR):
    print("Cloning dataset...")
    git.Repo.clone_from(REPO_URL, DATASET_DIR)
    print("Dataset cloned successfully!")
else:
    print("Dataset already exists.")


Cloning dataset...
Dataset cloned successfully!


In [None]:
def list_dataset_contents(dataset_dir):
    """List dataset structure with categories and subcategories."""
    dataset_structure = {}
    for category in sorted(os.listdir(dataset_dir)):
        category_path = os.path.join(dataset_dir, category)
        if os.path.isdir(category_path):
            dataset_structure[category] = sorted(os.listdir(category_path))
    return dataset_structure

dataset_structure = list_dataset_contents(DATASET_DIR)
print("Dataset Structure:", dataset_structure)


Dataset Structure: {'.git': ['HEAD', 'branches', 'config', 'description', 'hooks', 'index', 'info', 'logs', 'objects', 'packed-refs', 'refs'], 'TrashBox_train_dataset_subfolders': ['cardboard', 'e-waste', 'glass', 'medical', 'metal', 'paper', 'plastic'], 'TrashBox_train_set': ['cardboard', 'e-waste', 'glass', 'medical', 'metal', 'paper', 'plastic']}


In [None]:
from PIL import Image

def is_valid_image(file_path):
    """Check if the file is a valid image."""
    try:
        img = Image.open(file_path)
        img.verify()  # Check if the file is an actual image
        return True
    except:
        return False

def preprocess_images(dataset_dir):
    """Convert images to standard format and remove corrupt files."""
    for root, _, files in os.walk(dataset_dir):
        for file in files:
            file_path = os.path.join(root, file)
            if is_valid_image(file_path):
                try:
                    img = Image.open(file_path).convert("RGB")
                    img.save(file_path, "JPEG")
                except:
                    print(f"Skipping corrupt file: {file_path}")
            else:
                print(f"Removing non-image file: {file_path}")
                os.remove(file_path)

# Preprocess training dataset
preprocess_images("TrashBox/TrashBox_train_set")
print("Image preprocessing complete.")


Removing non-image file: TrashBox/TrashBox_train_set/paper/paper 2273.jpg




Removing non-image file: TrashBox/TrashBox_train_set/e-waste/e-waste 1719.jpg
Removing non-image file: TrashBox/TrashBox_train_set/cardboard/cardboard 1075.jpg
Image preprocessing complete.


In [None]:
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, random_split

# Define image transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images for CNN
    transforms.ToTensor(),  # Convert images to tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize
])

# Load dataset
DATASET_PATH = "TrashBox/TrashBox_train_dataset_subfolders"
dataset = datasets.ImageFolder(root=DATASET_PATH, transform=transform)

# Split dataset (80% train, 20% test)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Print class names
class_names = dataset.classes
print("Classes:", class_names)
print(f"Total images: {len(dataset)} | Training images: {train_size} | Testing images: {test_size}")


Classes: ['cardboard', 'e-waste', 'glass', 'medical', 'metal', 'paper', 'plastic']
Total images: 14279 | Training images: 11423 | Testing images: 2856


In [None]:
import torch.nn as nn
import torch.nn.functional as F

# Define CNN model
class WasteClassifierCNN(nn.Module):
    def __init__(self):
        super(WasteClassifierCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(64 * 28 * 28, 128)
        self.fc2 = nn.Linear(128, len(class_names))

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv3(x))
        x = F.max_pool2d(x, 2)
        x = torch.flatten(x, start_dim=1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Instantiate model
model = WasteClassifierCNN()
print("CNN model initialized.")


CNN model initialized.


In [None]:
from PIL import UnidentifiedImageError

# Validate images before training
def clean_dataset(dataset_path):
    for root, _, files in os.walk(dataset_path):
        for file in files:
            file_path = os.path.join(root, file)
            try:
                with Image.open(file_path) as img:
                    img.convert("RGB").save(file_path, "JPEG")  # Standardize format
            except (UnidentifiedImageError, OSError):
                print(f"Removing invalid image: {file_path}")
                os.remove(file_path)

print("Cleaning dataset...")
clean_dataset(DATASET_PATH)
print("Dataset cleaned successfully!")


Cleaning dataset...




Removing invalid image: TrashBox/TrashBox_train_dataset_subfolders/medical/gloves/gloves 332.jpg
Removing invalid image: TrashBox/TrashBox_train_dataset_subfolders/cardboard/cardboard 596.jpg
Removing invalid image: TrashBox/TrashBox_train_dataset_subfolders/cardboard/cardboard 2226.jpg
Dataset cleaned successfully!


In [None]:
import os
from PIL import Image

DATASET_PATH = "TrashBox/TrashBox_train_dataset_subfolders"

def is_valid_image(file_path):
    """Check if the file is a valid image."""
    try:
        img = Image.open(file_path)
        img.verify()
        return True
    except:
        return False

def clean_dataset(dataset_path):
    """Remove invalid image files from the dataset."""
    print("Cleaning dataset...")
    for root, _, files in os.walk(dataset_path):
        for file in files:
            file_path = os.path.join(root, file)
            if not is_valid_image(file_path):
                if os.path.exists(file_path):
                    os.remove(file_path)
                    print(f"Removing invalid image: {file_path}")
                else:
                    print(f"File not found, skipping: {file_path}")
    print("Dataset cleaned successfully!")

clean_dataset(DATASET_PATH)


Cleaning dataset...
Dataset cleaned successfully!


In [None]:
import time
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, random_split

# Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load dataset
DATASET_PATH = "TrashBox/TrashBox_train_dataset_subfolders"
dataset = datasets.ImageFolder(root=DATASET_PATH, transform=transform)

# Split dataset into training and testing sets (80-20 split)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

class_names = dataset.classes
print(f"Classes: {class_names}")
print(f"Total images: {len(dataset)} | Training images: {train_size} | Testing images: {test_size}")

# Define CNN model
class WasteClassifierCNN(nn.Module):
    def __init__(self, num_classes):
        super(WasteClassifierCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(64 * 28 * 28, 128)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv3(x))
        x = F.max_pool2d(x, 2)
        x = torch.flatten(x, start_dim=1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = WasteClassifierCNN(num_classes=len(class_names)).to(device)
print("CNN model initialized.")

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training function with execution time estimation
def train_cnn_model(model, train_loader, epochs=10):
    print("Starting training...")
    start_time = time.time()

    for epoch in range(epochs):
        epoch_start = time.time()
        total_loss = 0.0
        model.train()

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        epoch_time = time.time() - epoch_start
        remaining_time = epoch_time * (epochs - epoch - 1)

        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader):.4f}, "
              f"Time: {epoch_time:.2f}s, Estimated Remaining: {remaining_time:.2f}s")

    total_time = time.time() - start_time
    print(f"Training completed in {total_time:.2f} seconds.")
    torch.save(model.state_dict(), "cnn_waste_model.pth")
    print("Model training complete and saved.")

train_cnn_model(model, train_loader)


Classes: ['cardboard', 'e-waste', 'glass', 'medical', 'metal', 'paper', 'plastic']
Total images: 14276 | Training images: 11420 | Testing images: 2856
CNN model initialized.
Starting training...
Epoch 1/10, Loss: 1.8642, Time: 136.55s, Estimated Remaining: 1228.92s
Epoch 2/10, Loss: 1.6133, Time: 135.50s, Estimated Remaining: 1084.01s
Epoch 3/10, Loss: 1.3969, Time: 136.14s, Estimated Remaining: 952.95s
Epoch 4/10, Loss: 1.0099, Time: 138.71s, Estimated Remaining: 832.26s
Epoch 5/10, Loss: 0.5769, Time: 138.54s, Estimated Remaining: 692.69s
Epoch 6/10, Loss: 0.3178, Time: 138.79s, Estimated Remaining: 555.16s
Epoch 7/10, Loss: 0.2241, Time: 137.75s, Estimated Remaining: 413.26s
Epoch 8/10, Loss: 0.1478, Time: 137.28s, Estimated Remaining: 274.56s
Epoch 9/10, Loss: 0.1250, Time: 138.38s, Estimated Remaining: 138.38s
Epoch 10/10, Loss: 0.1127, Time: 137.53s, Estimated Remaining: 0.00s
Training completed in 1375.17 seconds.
Model training complete and saved.


In [None]:
import torch

# Evaluation function
def evaluate_model(model, test_loader):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()  # Set model to evaluation mode

    correct = 0
    total = 0

    with torch.no_grad():  # No need to track gradients during evaluation
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")

# Load trained model and evaluate
num_classes = len(class_names)  # Ensure class count matches
model = WasteClassifierCNN(num_classes=num_classes)  # Pass num_classes
model.load_state_dict(torch.load('cnn_waste_model.pth', map_location=torch.device("cuda" if torch.cuda.is_available() else "cpu"), weights_only=True))
evaluate_model(model, test_loader)


Test Accuracy: 42.05%


In [None]:
from google.colab import files

# Download the trained model
files.download('cnn_waste_model.pth')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>