In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
from PIL import Image
from tempfile import TemporaryDirectory
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from tqdm import tqdm
from torchvision.io import read_image
from sklearn.model_selection import train_test_split

In [10]:
from google.colab import drive
drive.mount('/content/drive') # Mounts it to YOUR drive. Nishanth and Chad, you will have to add a shortcut to dataset-resized.
# Dataset-realized is in the folder that was shared.
import os

# Path to the dataset folder
folder_path = '/content/drive/My Drive/dataset-resized' # All images are 512 x 384
#folder_path = '/content/drive/My Drive/24-782: ML and AI for Engineers - Project/4- Code/dataset-resized'  #Chad path
# folder_path = '/content/drive/My Drive/Courses/24-782: ML and AI for Engineers - Project/4- Code/dataset-resized' #Nishanth

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [11]:
#Get the paths/labels. This one includes an excpetion incase one fails.
def prepare_data(data_dir):
    categories = ['Trash', 'Plastic', 'Paper', 'Metal', 'Glass', 'Cardboard']
    image_paths = []
    labels = []  # Numerical labels: 0 for Trash, 1 for Plastic, etc.

    for label, category in enumerate(categories):
        category_dir = os.path.join(data_dir, category)
        try:
            for file in os.listdir(category_dir):
                if file.endswith('.jpg') or file.endswith('.png'):
                    image_paths.append(os.path.join(category_dir, file))
                    labels.append(label)
        except Exception as e:
            print(f"Failed to process category {category}: {e}")
            continue

    return image_paths, labels

In [12]:
image_paths, labels = prepare_data(folder_path)

In [13]:
# class CustomDataset(Dataset):
#     def __init__(self, image_paths, labels, transform=None):
#         self.image_paths = image_paths
#         self.labels = labels
#         self.transform = transform

#     def __len__(self):
#         return len(self.image_paths)

#     def __getitem__(self, index):
#         image_path = self.image_paths[index]
#         image = read_image(image_path)  # This loads image as a tensor in [0,1]

#         if self.transform:
#             image = self.transform(image)

#         label = torch.tensor(self.labels[index], dtype=torch.long)
#         return image, label

In [14]:
class CustomDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, index):
        image_path = self.image_paths[index]
        # Load the image as a PIL Image
        image = Image.open(image_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        label = torch.tensor(self.labels[index], dtype=torch.long)
        return image, label


In [15]:
train_paths, test_paths, train_labels, test_labels = train_test_split(
    image_paths, labels, test_size=0.15, random_state=0, stratify=labels) #This time we split the oringinal to train/test, so we use the stratify=labels

train_paths, val_paths, train_labels, val_labels = train_test_split(
    train_paths, train_labels, test_size=0.175, random_state=0, stratify=train_labels) #This time we further split train to include validation, so stratify=trian_labels

In [16]:
#I corrected the Normalization and cleaned the transforms we are note using.

#transforms.RandomCrop(224),

training_transform = transforms.Compose([
    transforms.Resize(256, antialias=True),
    transforms.RandomCrop(224),
    transforms.RandomRotation(20),
    transforms.RandomHorizontalFlip(0.1),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
    transforms.RandomAdjustSharpness(sharpness_factor=2, p=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

transform = transforms.Compose([
    transforms.Resize(256, antialias=True),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


In [17]:
#This will use the images and labels and apply to transforms for each dataset.
train_dataset = CustomDataset(train_paths, train_labels, transform=training_transform)
val_dataset = CustomDataset(val_paths, val_labels, transform=transform)
test_dataset = CustomDataset(test_paths, test_labels, transform=transform)

In [18]:
#Dataloaders created

batch_size = 32

train_dataloader = DataLoader(train_dataset, batch_size = batch_size, num_workers=1, shuffle = True)
val_dataloader = DataLoader(val_dataset, batch_size = batch_size, num_workers=1, shuffle = True)
test_dataloader = DataLoader(test_dataset, batch_size = batch_size, num_workers=1, shuffle = True)

In [19]:
#model Resnet50
model = torchvision.models.resnet50(weights = 'IMAGENET1K_V1')
for param in model.parameters():
  param.requires_grad = False

in_feat = model.fc.in_features # 2048 input features
classes = 6 # output feature classes.

model.fc = nn.Sequential(
    nn.Linear(in_feat, 512),
    nn.ReLU(),
    nn.Linear(512, classes)
)


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 152MB/s]


In [20]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [21]:
print(in_feat)

2048


In [None]:
# Training
lr = 1e-5
step_size = 20
gamma = 0.9
N_epochs = 5

loss_fun = nn.CrossEntropyLoss() #for Mulit Classification
optimizer = optim.Adam(params= model.parameters(), lr = lr)
lr_schedule = lr_scheduler.StepLR(optimizer= optimizer, step_size = step_size, gamma = gamma)

training_losses = []
training_acces = []
val_losses = []
val_acces = []
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
loss_fun.to(device)  # If your loss function has parameters that need to be on a specific device

for epoch in tqdm(range(N_epochs)):
    train_loss = 0.0
    train_acc = 0.0
    model.train()

    for x, y in train_dataloader:
        x, y = x.to(device), y.to(device)

        pred = model(x)
        _, predicted_classes = torch.max(pred, 1)
        correct_predictions = (predicted_classes == y).float()

        loss = loss_fun(pred, y.long())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        train_acc += correct_predictions.sum().item() / y.size(0)


    # Average loss and accuracy for the epoch
    train_loss /= len(train_dataloader)
    train_acc /= len(train_dataloader)
    training_losses.append(train_loss)
    training_acces.append(train_acc)

    # Validation phase
    val_loss = 0.0
    val_acc = 0.0
    model.eval()
    with torch.no_grad():
        for x, y in val_dataloader:
            x, y = x.to(device), y.to(device)

            pred = model(x)
            _, predicted_classes = torch.max(pred, 1)
            correct_predictions = (predicted_classes == y).float()

            loss = loss_fun(pred, y.long())  # Ensure consistent data type
            val_loss += loss.item()
            val_acc += correct_predictions.sum().item() / y.size(0)

    val_loss /= len(val_dataloader)
    val_acc /= len(val_dataloader)
    val_losses.append(val_loss)
    val_acces.append(val_acc)
    lr_schedule.step()  # Move this outside the batch loop, so it updates per epoch

    print(f"Epoch {epoch+1}/{N_epochs}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")


  0%|          | 0/5 [00:00<?, ?it/s]

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)  # Ensure model is on the correct device
model.eval()  # Set the model to evaluation mode

test_acc = 0
test_loss = 0
with torch.no_grad():  # No gradients needed for the testing phase
    for x, y in test_dataloader:
        x, y = x.to(device), y.to(device)  # Move data to the same device as the model

        pred = model(x)
        _, predicted_classes = torch.max(pred, 1)
        correct_predictions = (predicted_classes == y).float()  # Compare predicted classes to true labels
        loss = loss_fun(pred, y.long())  # Ensure y is the correct type if necessary

        test_loss += loss.item()
        accuracy = correct_predictions.sum() / len(y)
        test_acc += accuracy.item()

test_loss /= len(test_dataloader)
test_acc /= len(test_dataloader)

print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_acc:.4f}")

In [None]:
epochs = np.arange(1, N_epochs + 1)

# Similar colors for losses
color_loss = 'blue'
color_val_loss = 'orange'

# Similar colors for accuracies
color_acc = 'green'
color_val_acc = 'red'

# Creating the plot with the updated color scheme
fig, ax1 = plt.subplots()

ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss', color=color_loss)
ax1.plot(epochs, training_losses, label='Training Loss', marker='o', color=color_loss)
ax1.plot(epochs, val_losses, label='Validation Loss', marker='x', color=color_val_loss)
ax1.tick_params(axis='y', labelcolor=color_loss)

ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

ax2.set_ylabel('Accuracy', color=color_acc)
ax2.plot(epochs, training_acces, label='Training Accuracy', marker='o', color=color_acc)
ax2.plot(epochs, val_acces, label='Validation Accuracy', marker='x', color=color_val_acc)
ax2.tick_params(axis='y', labelcolor=color_acc)

# Adding legends
ax1.legend(loc='upper left')
ax2.legend(loc='lower left')

fig.tight_layout()  # otherwise the right y-label is slightly clipped
plt.title('Training & Validation Loss and Accuracy')
plt.show()