In [1]:
import matplotlib.pyplot as plt
import numpy as np
import torchvision
import tensorflow as tf
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, TensorDataset
from torchvision.io import read_image, ImageReadMode
from pathlib import Path
from sklearn.preprocessing import LabelEncoder
from PIL import Image, ImageDraw, ImageFont
from torchvision import transforms
import random
import os
import zipfile

from sklearn.model_selection import train_test_split
from torchvision.models import resnet50, ResNet50_Weights

In [2]:
from google.colab import drive

In [3]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
!ls '/content/drive/MyDrive/Team 5/img/cnn_imgs'

banana	boxA  boxB  boxC  boxD	boxE  monkey


In [5]:
categories = ["boxA", "banana", "monkey", "boxB", "boxC", "boxD", "boxE"]

In [6]:
cat_to_label = {c: i for i,c in enumerate(categories)}

In [7]:
root_dir =  '/content/drive/MyDrive/Team 5/img/cnn_imgs'

In [8]:
for folder in categories:
    folder_path = os.path.join(root_dir, folder)

    if os.path.exists(folder_path):
        num_files = len(os.listdir(folder_path))
        print(f"Folder: {folder}, Number of files: {num_files}")

Folder: boxA, Number of files: 72
Folder: banana, Number of files: 111
Folder: monkey, Number of files: 99
Folder: boxB, Number of files: 53
Folder: boxC, Number of files: 45
Folder: boxD, Number of files: 27
Folder: boxE, Number of files: 10


In [9]:
cat_label = []
path_label = []
images = []

for label_folder in os.listdir(root_dir):
  print(label_folder)
  label_path = os.path.join(root_dir, label_folder)
  if os.path.isdir(label_path):
    for file_name in os.listdir(label_path):
      file_path = os.path.join(label_path, file_name)
      if file_name.lower().endswith(('.png')):
          img = Image.open(file_path).convert("RGB")
          resize_transform = transforms.Resize((32, 32))
          img_resized = resize_transform(img)

          to_tensor = transforms.ToTensor()
          img_tensor = to_tensor(img_resized)

          images.append(img_tensor)
          cat_label.append(label_folder)
          path_label.append(file_path)


boxA
banana
monkey
boxB
boxC
boxD
boxE


In [10]:
from collections import Counter
label_counts = Counter(cat_label)

# Print the count of each label
for label in categories:
    print(f"{label}: {label_counts[label]}")

boxA: 72
banana: 111
monkey: 99
boxB: 53
boxC: 45
boxD: 27
boxE: 10


In [13]:
len(images)

417

In [14]:
images = torch.stack(images)

In [15]:
model = nn.Sequential(
    nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),

    nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),

    nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2),

    nn.Flatten(),

    nn.Linear(128 * 4* 4, 128),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(128, 7)
)

In [16]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [17]:
X_train, X_test, y_train, y_test = train_test_split(images, cat_label, test_size=0.25, random_state=51)

In [18]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=51)

In [19]:
label_encoder = LabelEncoder()
y_train_int = label_encoder.fit_transform(y_train)
y_test_int = label_encoder.transform(y_test)

In [30]:
import joblib
joblib.dump(label_encoder, 'label_encoder.pkl')

['label_encoder.pkl']

In [21]:
X_train_tensor = X_train.float()
y_train_tensor = torch.tensor(y_train_int, dtype=torch.long)

X_test_tensor = X_test.float()
y_test_tensor = torch.tensor(y_test_int, dtype=torch.long)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

batch_size = 2
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [22]:
X_train_tensor.shape

torch.Size([234, 3, 32, 32])

In [23]:
X_test_tensor.shape

torch.Size([105, 3, 32, 32])

In [24]:
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    epoch_loss = running_loss / len(train_loader)
    epoch_accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%")

Epoch [1/100], Loss: 1.4813, Accuracy: 37.18%
Epoch [2/100], Loss: 0.9187, Accuracy: 64.10%
Epoch [3/100], Loss: 0.7462, Accuracy: 71.79%
Epoch [4/100], Loss: 0.5876, Accuracy: 79.49%
Epoch [5/100], Loss: 0.4658, Accuracy: 85.90%
Epoch [6/100], Loss: 0.3422, Accuracy: 91.45%
Epoch [7/100], Loss: 0.2072, Accuracy: 93.16%
Epoch [8/100], Loss: 0.2413, Accuracy: 93.59%
Epoch [9/100], Loss: 0.3002, Accuracy: 92.31%
Epoch [10/100], Loss: 0.1907, Accuracy: 96.58%
Epoch [11/100], Loss: 0.1251, Accuracy: 97.01%
Epoch [12/100], Loss: 0.1414, Accuracy: 97.01%
Epoch [13/100], Loss: 0.0988, Accuracy: 97.86%
Epoch [14/100], Loss: 0.1185, Accuracy: 97.01%
Epoch [15/100], Loss: 0.1495, Accuracy: 95.73%
Epoch [16/100], Loss: 0.0834, Accuracy: 97.86%
Epoch [17/100], Loss: 0.1057, Accuracy: 97.01%
Epoch [18/100], Loss: 0.0832, Accuracy: 98.72%
Epoch [19/100], Loss: 0.0701, Accuracy: 97.86%
Epoch [20/100], Loss: 0.0611, Accuracy: 99.15%
Epoch [21/100], Loss: 0.0770, Accuracy: 99.15%
Epoch [22/100], Loss: 

In [27]:
model.eval()
val_correct = 0
val_total = 0
with torch.no_grad():
    for val_inputs, val_labels in test_loader:
        val_outputs = model(val_inputs)
        _, val_predicted = torch.max(val_outputs, 1)
        val_total += val_labels.size(0)
        val_correct += (val_predicted == val_labels).sum().item()

val_accuracy = 100 * val_correct / val_total
print(f"Test Accuracy: {val_accuracy:.2f}%")


Test Accuracy: 100.00%


In [26]:
torch.save(model.state_dict(), 'cnn.pth')