In [None]:
import os
import git

REPO_URL = "https://github.com/nikhilvenkatkumsetty/TrashBox.git"
DATASET_DIR = "TrashBox"

if not os.path.exists(DATASET_DIR):
    git.Repo.clone_from(REPO_URL, DATASET_DIR)
    print("Repository cloned successfully!")
else:
    print("Repository already exists.")


Repository cloned successfully!


In [None]:
def list_dataset_contents(dataset_dir):
    """List dataset structure with categories and subcategories."""
    dataset_structure = {}
    for category in sorted(os.listdir(dataset_dir)):
        category_path = os.path.join(dataset_dir, category)
        if os.path.isdir(category_path):
            dataset_structure[category] = sorted(os.listdir(category_path))
    return dataset_structure

dataset_structure = list_dataset_contents(DATASET_DIR)
print("Dataset Structure:", dataset_structure)


Dataset Structure: {'.git': ['HEAD', 'branches', 'config', 'description', 'hooks', 'index', 'info', 'logs', 'objects', 'packed-refs', 'refs'], 'TrashBox_train_dataset_subfolders': ['cardboard', 'e-waste', 'glass', 'medical', 'metal', 'paper', 'plastic'], 'TrashBox_train_set': ['cardboard', 'e-waste', 'glass', 'medical', 'metal', 'paper', 'plastic']}


In [None]:
import os
import git
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets, models
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from PIL import Image

# Step 1: Clone dataset if not already present
REPO_URL = "https://github.com/nikhilvenkatkumsetty/TrashBox.git"
DATASET_PATH = "TrashBox/TrashBox_train_set"

if not os.path.exists("TrashBox"):
    print("Cloning dataset...")
    git.Repo.clone_from(REPO_URL, "TrashBox")
    print("Dataset cloned successfully!")

# Step 2: Check for valid images
def is_valid_image(file_path):
    """Check if a file is a valid image."""
    try:
        img = Image.open(file_path)
        img.verify()
        return True
    except:
        return False

# Convert images to standard format
for root, _, files in os.walk(DATASET_PATH):
    for file in files:
        file_path = os.path.join(root, file)
        if is_valid_image(file_path):
            try:
                img = Image.open(file_path).convert("RGB")
                img.save(file_path, "JPEG")  # Save as JPEG
            except:
                print(f"Skipping corrupt file: {file_path}")
        else:
            print(f"Removing non-image file: {file_path}")
            os.remove(file_path)

# Step 3: Define transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Step 4: Load dataset
dataset = datasets.ImageFolder(root=DATASET_PATH, transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
class_names = dataset.classes
print("Classes:", class_names)

# Step 5: Load pre-trained ResNet-18 model
model = models.resnet18(pretrained=True)
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, len(class_names))

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Move to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# Step 6: Training loop
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(dataloader):.4f}")

# Step 7: Save trained model
torch.save(model.state_dict(), "waste_classifier.pth")
print("Model training complete and saved!")


Removing non-image file: TrashBox/TrashBox_train_set/paper/paper 2273.jpg




Removing non-image file: TrashBox/TrashBox_train_set/e-waste/e-waste 1719.jpg
Removing non-image file: TrashBox/TrashBox_train_set/cardboard/cardboard 1075.jpg
Classes: ['cardboard', 'e-waste', 'glass', 'medical', 'metal', 'paper', 'plastic']


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 168MB/s]


Epoch [1/5], Loss: 1.1440
Epoch [2/5], Loss: 0.8070
Epoch [3/5], Loss: 0.6502
Epoch [4/5], Loss: 0.5276
Epoch [5/5], Loss: 0.4240
Model training complete and saved!


In [None]:
import os
import shutil
from sklearn.model_selection import train_test_split

# Define the directory paths
train_dir = 'TrashBox/TrashBox_train_set'  # Path to your training data
test_dir = 'TrashBox/TrashBox_test_set'   # Path to your test data

# Create test directory if it doesn't exist
if not os.path.exists(test_dir):
    os.makedirs(test_dir)

# List classes (folders) in the training directory
classes = os.listdir(train_dir)

# Split each class folder into train and test sets
for class_name in classes:
    class_path = os.path.join(train_dir, class_name)

    # List all image files in the class folder
    images = os.listdir(class_path)

    # Split the images into train and test (80% train, 20% test)
    train_images, test_images = train_test_split(images, test_size=0.2, random_state=42)

    # Create test folder for the class if it doesn't exist
    test_class_path = os.path.join(test_dir, class_name)
    if not os.path.exists(test_class_path):
        os.makedirs(test_class_path)

    # Move images to the test folder
    for img in test_images:
        shutil.move(os.path.join(class_path, img), os.path.join(test_class_path, img))

print("Dataset split completed! Test set created at:", test_dir)


Dataset split completed! Test set created at: TrashBox/TrashBox_test_set


In [None]:
# Path to your test dataset
test_dir = 'TrashBox/TrashBox_test_set'

# Define the transformations for the test dataset
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to match model input size
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load the test dataset
test_dataset = datasets.ImageFolder(root=test_dir, transform=test_transform)

# Create the DataLoader for the test dataset
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

# Evaluate the model
model.eval()  # Set the model to evaluation mode
correct = 0
total = 0

with torch.no_grad():  # No need to compute gradients for evaluation
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Accuracy of the model on the test images: {accuracy:.2f}%")




Accuracy of the model on the test images: 87.93%


In [None]:
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

# Define transformations for the training dataset
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images to 224x224 (required by most models)
    transforms.ToTensor(),          # Convert images to PyTorch tensors
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize with ImageNet values
])

# Define the path to the training dataset
train_dir = 'TrashBox/TrashBox_train_set'

# Load the training dataset
train_dataset = datasets.ImageFolder(root=train_dir, transform=train_transform)

# Create the DataLoader for the training dataset
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)

# Now, you can continue with the training loop


In [None]:
# Continue training for more epochs (let's say, 5 more epochs)
num_epochs = 5  # Set the number of additional epochs
for epoch in range(6, 6 + num_epochs):  # Start from epoch 6 if you have already completed 5 epochs
    model.train()
    running_loss = 0.0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        # Track running loss
        running_loss += loss.item()

    # Print statistics for every epoch
    print(f"Epoch [{epoch}/{6 + num_epochs}], Loss: {running_loss / len(train_loader):.4f}")

# Save the updated model
torch.save(model.state_dict(), 'trashbox_model_updated.pth')


Epoch [6/11], Loss: 0.3324
Epoch [7/11], Loss: 0.2247
Epoch [8/11], Loss: 0.1945
Epoch [9/11], Loss: 0.1653
Epoch [10/11], Loss: 0.1635


In [None]:
from google.colab import drive
drive.mount('/content/drive')
# Define the path to save the model on Google Drive
model_save_path = '/content/drive/MyDrive/trashbox_model.pth'

# Save the model's state_dict to Google Drive
torch.save(model.state_dict(), model_save_path)

print(f"Model saved to: {model_save_path}")


Mounted at /content/drive
Model saved to: /content/drive/MyDrive/trashbox_model.pth


In [None]:
import torch
import torch.nn as nn
import torchvision.models as models
from google.colab import drive, files
from torchvision import transforms
from PIL import Image
import io

# Mount Google Drive
drive.mount('/content/drive')

# Load the pre-trained ResNet model
model = models.resnet18(pretrained=True)

# Modify the final fully connected layer for your number of classes (7 categories in this case)
model.fc = nn.Linear(model.fc.in_features, 7)  # Adjust this for your number of categories

# Load the trained model weights (make sure the path is correct)
model.load_state_dict(torch.load('/content/drive/MyDrive/trashbox_model.pth'))  # Adjust the path
model.eval()

# Define image transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Categories (replace with your actual categories)
categories = ["Cardboard", "E-Waste", "Glass", "Medical", "Metal", "Paper", "Plastic"]

# Function to classify uploaded image
def classify_uploaded_image():
    uploaded = files.upload()  # Upload an image file

    for filename in uploaded.keys():
        # Open the uploaded image
        img = Image.open(io.BytesIO(uploaded[filename]))

        # Apply the transformations
        img_tensor = transform(img).unsqueeze(0)

        # Classify the image
        with torch.no_grad():
            outputs = model(img_tensor)
            _, predicted = torch.max(outputs, 1)
            predicted_class = categories[predicted.item()]

        # Show result
        print(f"Predicted Category: {predicted_class}")
        img.show()  # Display the uploaded image

# Step 5: Classify the uploaded image
classify_uploaded_image()


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 77.8MB/s]
  model.load_state_dict(torch.load('/content/drive/MyDrive/trashbox_model.pth'))  # Adjust the path


Saving 360_F_498362712_7sJRmv7sOsfCtqieE0wtIjUpdUBvF4PY.jpg to 360_F_498362712_7sJRmv7sOsfCtqieE0wtIjUpdUBvF4PY.jpg
Predicted Category: Plastic


In [None]:
from google.colab import files
files.download('/content/drive/MyDrive/trashbox_model.pth')

FileNotFoundError: Cannot find file: /content/drive/MyDrive/trashbox_model.pth