In [28]:
# ai model to use object detection to detect is object is glass, plastic, metal or e waste using pytorch and torchvision and resnet50
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torch.utils.data as data
import os

In [29]:
# use cuda if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device


device(type='cpu')

In [30]:
# directory of the dataset is Garbage classification and the dataset is divided into respective folders of glass, plastic, metal,paper and e-waste

# define the transforms for the dataset
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# load the dataset
dataset = torchvision.datasets.ImageFolder(root='Garbage classification', transform=transform)

In [31]:
# no of classes in the dataset
num_classes = len(dataset.classes)
class_names = dataset.classes
class_names


['e-waste', 'glass', 'metal', 'paper', 'plastic']

In [38]:
# no need to split the dataset

# define the dataloader
dataloader = data.DataLoader(dataset, batch_size=128, shuffle=True)

# make a model which can identify the diff classes of garbage and dont use resnet50 pretrained model
model = nn.Sequential(
    nn.Conv2d(3, 32, 3, 1, 1),
    nn.ReLU(),
    nn.MaxPool2d(2, 2),
    nn.Conv2d(32, 64, 3, 1, 1),
    nn.ReLU(),
    nn.MaxPool2d(2, 2),
    nn.Conv2d(64, 128, 3, 1, 1),
    nn.ReLU(),
    nn.MaxPool2d(2, 2),
    nn.Flatten(),
    nn.Linear(128 * 28 * 28, 512),
    nn.ReLU(),
    nn.Linear(512, num_classes)
)
# change the output layer
model.fc = nn.Linear(2048, num_classes)

# define the loss function
criterion = nn.CrossEntropyLoss()
criterion = criterion.to(device)

# define the optimizer
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

In [39]:
# train the model
num_epochs = 100
for epoch in range(num_epochs):
    for inputs, labels in dataloader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    print(f'Epoch [{epoch}/{num_epochs}], Loss: {loss.item()}')

# save the model
torch.save(model, 'garbage_classification.pth')
print('Model trained and saved')

RuntimeError: mat1 and mat2 shapes cannot be multiplied (128x5 and 2048x5)

In [None]:
# test the model
model = torch.load('garbage_classification.pth')
model.eval()

# load the test dataset with just 32 images
test_dataset = torchvision.datasets.ImageFolder(root='Garbage classification', transform=transform)

# define the test dataloader
test_dataloader = data.DataLoader(test_dataset, batch_size=32, shuffle=True)

# test the model
correct = 0
total = 0
for inputs, labels in test_dataloader:
    outputs = model(inputs)
    _, predicted = torch.max(outputs, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

print(f'Accuracy: {correct / total * 100}%')
print('Test completed')


In [None]:
# use model to predict the class of the garbage in camera 
import cv2
import numpy as np

# load the model
model = torch.load('garbage_classification.pth')
model.eval()

# load the labels
class_names = dataset.classes

# define the transforms
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# use the camera
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        continue

    # convert the frame to RGB
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # convert the frame to tensor
    inputs = transform(frame)
    inputs = inputs.unsqueeze(0)

    # get the prediction
    outputs = model(inputs)
    _, predicted = torch.max(outputs, 1)

    # get the class name
    class_name = class_names[predicted]

    # put the class name on the frame
    cv2.putText(frame, class_name, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # convert the frame back to BGR
    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

    # display the frame
    cv2.imshow('frame', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()
print('Camera closed')