In [2]:
import os
import zipfile
import requests
import json
from pathlib import Path
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from PIL import Image

# Load Croissant metadata
croissant_path = "garbage-classification-metadata (1).json"
with open(croissant_path, "r") as f:
    metadata = json.load(f)

# Extract dataset URL
dataset_url = metadata["distribution"][0]["contentUrl"]
dataset_zip = "garbage_classification.zip"
extract_path = "garbage_classification"

# Download dataset if not exists
if not os.path.exists(dataset_zip):
    print("Downloading dataset...")
    response = requests.get(dataset_url, stream=True)
    with open(dataset_zip, "wb") as f:
        for chunk in response.iter_content(chunk_size=1024):
            f.write(chunk)
    print("Download complete!")

# Extract dataset if not already extracted
if not os.path.exists(extract_path):
    print("Extracting dataset...")
    with zipfile.ZipFile(dataset_zip, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
    print("Extraction complete!")

# Define dataset transformation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

# Load dataset with PyTorch DataLoader
train_dataset = datasets.ImageFolder(root=extract_path, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

print(f"Loaded {len(train_dataset)} images into DataLoader!")


Downloading dataset...
Download complete!
Extracting dataset...
Extraction complete!
Loaded 15515 images into DataLoader!


In [3]:
import os
import zipfile
import requests
import json
import torch
import torch.nn as nn
import torch.optim as optim
from pathlib import Path
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from PIL import Image

# Load Croissant metadata
croissant_path = "garbage-classification-metadata (1).json"
with open(croissant_path, "r") as f:
    metadata = json.load(f)

# Extract dataset URL
dataset_url = metadata["distribution"][0]["contentUrl"]
dataset_zip = "garbage_classification.zip"
extract_path = "garbage_classification"

# Download dataset if not exists
if not os.path.exists(dataset_zip):
    print("Downloading dataset...")
    response = requests.get(dataset_url, stream=True)
    with open(dataset_zip, "wb") as f:
        for chunk in response.iter_content(chunk_size=1024):
            f.write(chunk)
    print("Download complete!")

# Extract dataset if not already extracted
if not os.path.exists(extract_path):
    print("Extracting dataset...")
    with zipfile.ZipFile(dataset_zip, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
    print("Extraction complete!")

# Define dataset transformation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

# Load dataset with PyTorch DataLoader
train_dataset = datasets.ImageFolder(root=extract_path, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

print(f"Loaded {len(train_dataset)} images into DataLoader!")

# Define CNN Model
class CNNModel(nn.Module):
    def __init__(self, num_classes=12):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(64 * 56 * 56, 128)
        self.fc2 = nn.Linear(128, num_classes)
    
    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize model, loss, and optimizer
model = CNNModel()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

print("CNN Model initialized!")


Loaded 15515 images into DataLoader!
CNN Model initialized!


In [4]:
import os
import zipfile
import requests
import json
import torch
import torch.nn as nn
import torch.optim as optim
from pathlib import Path
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from PIL import Image

# Load Croissant metadata
croissant_path = "garbage-classification-metadata (1).json"
with open(croissant_path, "r") as f:
    metadata = json.load(f)

# Extract dataset URL
dataset_url = metadata["distribution"][0]["contentUrl"]
dataset_zip = "garbage_classification.zip"
extract_path = "garbage_classification"

# Download dataset if not exists
if not os.path.exists(dataset_zip):
    print("Downloading dataset...")
    response = requests.get(dataset_url, stream=True)
    with open(dataset_zip, "wb") as f:
        for chunk in response.iter_content(chunk_size=1024):
            f.write(chunk)
    print("Download complete!")

# Extract dataset if not already extracted
if not os.path.exists(extract_path):
    print("Extracting dataset...")
    with zipfile.ZipFile(dataset_zip, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
    print("Extraction complete!")

# Define dataset transformation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

# Load dataset with PyTorch DataLoader
train_dataset = datasets.ImageFolder(root=extract_path, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

print(f"Loaded {len(train_dataset)} images into DataLoader!")

# Define CNN Model
class CNNModel(nn.Module):
    def __init__(self, num_classes=12):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(64 * 56 * 56, 128)
        self.fc2 = nn.Linear(128, num_classes)
    
    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize model, loss, and optimizer
model = CNNModel()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

print("CNN Model initialized!")

# Training Loop
def train(model, train_loader, criterion, optimizer, epochs=10):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for images, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}")

# Train the model
train(model, train_loader, criterion, optimizer, epochs=10)
print("Training complete!")


Loaded 15515 images into DataLoader!
CNN Model initialized!
Epoch 1/10, Loss: 0.0052
Epoch 2/10, Loss: 0.0000
Epoch 3/10, Loss: 0.0000
Epoch 4/10, Loss: 0.0000
Epoch 5/10, Loss: 0.0000
Epoch 6/10, Loss: 0.0000
Epoch 7/10, Loss: 0.0000
Epoch 8/10, Loss: 0.0000
Epoch 9/10, Loss: 0.0000
Epoch 10/10, Loss: 0.0000
Training complete!


In [6]:
# What is the accuracy of the model on the training set?
def evaluate(model, train_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in train_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy 

print(f"Training Accuracy: {evaluate(model, train_loader):.2f}%")

Training Accuracy: 100.00%
