In [1]:
import matplotlib.pyplot as plt
import numpy as np
import torchvision
import tensorflow as tf
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, TensorDataset
from torchvision.io import read_image, ImageReadMode
from pathlib import Path
from sklearn.preprocessing import LabelEncoder
from PIL import Image, ImageDraw, ImageFont
from torchvision import transforms
import random
import os
import zipfile

from sklearn.model_selection import train_test_split
from torchvision.models import resnet50, ResNet50_Weights

In [2]:
from google.colab import drive

In [3]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
!ls '/content/drive/MyDrive/Team 5/img/test_img'

banana.png  box_A.png  box_B.png  box_C.png  monkey.png


In [5]:
categories = ["banana", "monkey", "box_A", "box_B", "box_C", "box_D"]

In [6]:
cat_to_label = {"banana": 0, "monkey": 1, "box_A": 2, "box_B": 3, "box_C": 4, "box_D": 5}

In [7]:
root_dir =  '/content/drive/MyDrive/Team 5/img/train_img'

In [8]:
cat_label = []
path_label = []
images = []

for label_folder in os.listdir(root_dir):
    label_path = os.path.join(root_dir, label_folder)
    if os.path.isdir(label_path):
      for file_name in os.listdir(label_path):
        file_path = os.path.join(label_path, file_name)
        if file_name.lower().endswith(('.png')):
            img = Image.open(file_path).convert("RGB")
            resize_transform = transforms.Resize((32, 32))
            img_resized = resize_transform(img)

            to_tensor = transforms.ToTensor()
            img_tensor = to_tensor(img_resized)

            images.append(img_tensor)
            cat_label.append(label_folder)
            path_label.append(file_path)


In [9]:
cat_label

['monkey',
 'monkey',
 'monkey',
 'monkey',
 'monkey',
 'monkey',
 'monkey',
 'monkey',
 'box_C',
 'box_C',
 'box_C',
 'box_C',
 'box_C',
 'box_C',
 'box_C',
 'box_B',
 'box_B',
 'box_B',
 'box_B',
 'box_B',
 'box_B',
 'box_B',
 'box_B',
 'box_A',
 'box_A',
 'box_A',
 'box_A',
 'box_A',
 'box_A',
 'box_A',
 'box_A',
 'box_D',
 'box_D',
 'banana',
 'banana',
 'banana',
 'banana',
 'banana',
 'banana',
 'banana',
 'banana',
 'banana',
 'banana',
 'banana',
 'banana',
 'banana',
 'banana',
 'banana',
 'banana',
 'banana',
 'banana']

In [10]:
path_label

['/content/drive/MyDrive/Team 5/img/train_img/monkey/ROI_27.png',
 '/content/drive/MyDrive/Team 5/img/train_img/monkey/ROI_6.png',
 '/content/drive/MyDrive/Team 5/img/train_img/monkey/ROI_13.png',
 '/content/drive/MyDrive/Team 5/img/train_img/monkey/ROI_32.png',
 '/content/drive/MyDrive/Team 5/img/train_img/monkey/ROI_39.png',
 '/content/drive/MyDrive/Team 5/img/train_img/monkey/ROI_50.png',
 '/content/drive/MyDrive/Team 5/img/train_img/monkey/ROI_20.png',
 '/content/drive/MyDrive/Team 5/img/train_img/monkey/ROI_46.png',
 '/content/drive/MyDrive/Team 5/img/train_img/box_C/ROI_30.png',
 '/content/drive/MyDrive/Team 5/img/train_img/box_C/ROI_42.png',
 '/content/drive/MyDrive/Team 5/img/train_img/box_C/ROI_34.png',
 '/content/drive/MyDrive/Team 5/img/train_img/box_C/ROI_1.png',
 '/content/drive/MyDrive/Team 5/img/train_img/box_C/ROI_23.png',
 '/content/drive/MyDrive/Team 5/img/train_img/box_C/ROI_12.png',
 '/content/drive/MyDrive/Team 5/img/train_img/box_C/ROI_16.png',
 '/content/drive/My

In [11]:
len(images)

51

In [12]:
images = torch.stack(images)

In [13]:
model = nn.Sequential(
    nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),

    nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),

    nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),

    nn.Flatten(),

    nn.Linear(128 * 4* 4, 128),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(128, 6)
)

In [14]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [15]:
X_train, X_test, y_train, y_test = train_test_split(images, cat_label, test_size=0.25, random_state=51)

In [16]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=51)

In [17]:
label_encoder = LabelEncoder()
y_train_int = label_encoder.fit_transform(y_train)
y_test_int = label_encoder.transform(y_test)

In [18]:
X_train_tensor = X_train.float()
y_train_tensor = torch.tensor(y_train_int, dtype=torch.long)

X_test_tensor = X_test.float()
y_test_tensor = torch.tensor(y_test_int, dtype=torch.long)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

batch_size = 2
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [19]:
X_train_tensor.shape

torch.Size([28, 3, 32, 32])

In [20]:
X_test_tensor.shape

torch.Size([13, 3, 32, 32])

In [21]:
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(train_loader)
    epoch_accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%")

Epoch [1/100], Loss: 1.7770, Accuracy: 14.29%
Epoch [2/100], Loss: 1.6276, Accuracy: 32.14%
Epoch [3/100], Loss: 1.4712, Accuracy: 35.71%
Epoch [4/100], Loss: 1.3138, Accuracy: 46.43%
Epoch [5/100], Loss: 1.1812, Accuracy: 50.00%
Epoch [6/100], Loss: 1.0384, Accuracy: 57.14%
Epoch [7/100], Loss: 0.9978, Accuracy: 57.14%
Epoch [8/100], Loss: 0.8719, Accuracy: 57.14%
Epoch [9/100], Loss: 0.7277, Accuracy: 64.29%
Epoch [10/100], Loss: 0.7005, Accuracy: 71.43%
Epoch [11/100], Loss: 0.8077, Accuracy: 64.29%
Epoch [12/100], Loss: 0.7817, Accuracy: 75.00%
Epoch [13/100], Loss: 0.7484, Accuracy: 64.29%
Epoch [14/100], Loss: 0.6846, Accuracy: 67.86%
Epoch [15/100], Loss: 0.6478, Accuracy: 71.43%
Epoch [16/100], Loss: 0.6548, Accuracy: 67.86%
Epoch [17/100], Loss: 0.6882, Accuracy: 64.29%
Epoch [18/100], Loss: 0.5944, Accuracy: 78.57%
Epoch [19/100], Loss: 0.5471, Accuracy: 82.14%
Epoch [20/100], Loss: 0.4902, Accuracy: 82.14%
Epoch [21/100], Loss: 0.3809, Accuracy: 92.86%
Epoch [22/100], Loss: 

In [22]:
model.eval()
val_correct = 0
val_total = 0
with torch.no_grad():
    for val_inputs, val_labels in test_loader:
        val_outputs = model(val_inputs)
        _, val_predicted = torch.max(val_outputs, 1)
        val_total += val_labels.size(0)
        val_correct += (val_predicted == val_labels).sum().item()

val_accuracy = 100 * val_correct / val_total
print(f"Test Accuracy: {val_accuracy:.2f}%")


Test Accuracy: 100.00%


In [23]:
torch.save(model.state_dict(), 'cnn.pth')