In [1]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import random

class ImageClassificationDataset(Dataset):
    def __init__(self, positive_dir, negative_dir, transform=None):
        self.positive_dir = positive_dir
        self.negative_dir = negative_dir
        self.transform = transform
        self.positive_files = [(os.path.join(positive_dir, f), 1) for f in os.listdir(positive_dir) if f.endswith('.png')]
        self.negative_files = [(os.path.join(negative_dir, f), 0) for f in os.listdir(negative_dir) if f.endswith('.png')]
        self.all_files = self.positive_files + self.negative_files

    def __len__(self):
        return len(self.all_files)

    def __getitem__(self, idx):
        file_path, label = self.all_files[idx]
        image = Image.open(file_path).convert("RGB")
        
        if self.transform:
            image = self.transform(image)
        
        return image, label, os.path.basename(file_path)

transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor()
])

positive_dir = "data/train_images/Train/Positive"
negative_dir = "data/train_images/Train/Negative"

# use an equal number of positive and negative samples for training
positive_files = [f for f in os.listdir(positive_dir) if f.endswith('.png')]
negative_files = [f for f in os.listdir(negative_dir) if f.endswith('.png')]
n = min(len(positive_files), len(negative_files))
positive_files = positive_files[:n]
negative_files = negative_files[:n]

# dataset = ImageClassificationDataset(positive_dir, negative_dir, transform)
# dataloader = DataLoader(dataset, batch_size=32, shuffle=True)
# suffle split train and test data 85/15
random.shuffle(positive_files)
random.shuffle(negative_files)
split = int(0.85 * n)
train_positive_files = positive_files[:split]
train_negative_files = negative_files[:split]
test_positive_files = positive_files[split:]

train_dataset = ImageClassificationDataset(positive_dir, negative_dir, transform)
train_dataloader = DataLoader(train_dataset, batch_size=256, shuffle=True)

test_dataset = ImageClassificationDataset(positive_dir, negative_dir, transform)
test_dataloader = DataLoader(test_dataset, batch_size=256, shuffle=True)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import torch.nn as nn
import torch.nn.functional as F

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(64 * 32 * 32, 128)
        self.fc2 = nn.Linear(128, 2)  # Output: 2 classes (positive and negative)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 32 * 32)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = SimpleCNN()

In [4]:
import torch.optim as optim

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels, _ in train_dataloader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_dataloader):.4f}")

print("Training complete.")

Epoch [1/10], Loss: 1.1291
Epoch [2/10], Loss: 0.6111
Epoch [3/10], Loss: 0.5313
Epoch [4/10], Loss: 0.4594
Epoch [5/10], Loss: 0.4118
Epoch [6/10], Loss: 0.3987
Epoch [7/10], Loss: 0.3747
Epoch [8/10], Loss: 0.3518
Epoch [9/10], Loss: 0.3302
Epoch [10/10], Loss: 0.3132
Training complete.


In [5]:
#store the model in "data/train_images/Train/models/"
model_dir = "data/train_images/Train/models"
os.makedirs(model_dir, exist_ok=True)
model_path = os.path.join(model_dir, "model.pth")
torch.save(model.state_dict(), model_path)
print(f"Model saved at {model_path}")

# Load the model
model = SimpleCNN()
model.load_state_dict(torch.load(model_path))
model.eval()

Model saved at data/train_images/Train/models/model.pth


SimpleCNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=65536, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=2, bias=True)
)

In [13]:
# Function to evaluate the model and output file names
def evaluate_model(model, test_dataloader):
    model.eval()
    all_predictions = []
    all_labels = []
    all_filenames = []

    with torch.no_grad():
        for images, labels, filenames in test_dataloader:
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            all_predictions.extend(predicted.numpy())
            all_labels.extend(labels.numpy())
            all_filenames.extend(filenames)

    return all_predictions, all_labels, all_filenames

# # Evaluate the model
# test_dataset = ImageClassificationDataset(positive_dir, negative_dir, transform)
# test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

predictions, labels, filenames = evaluate_model(model, test_dataloader)


positive_dir = "data/train_images/Train/Positive"
# Print the results
idx = 0
for filename, prediction, label in zip(filenames, predictions, labels):
    print(f"File: {filename}, Prediction: {prediction}, Label: {label}")
    # print the original image
    try:
        image = Image.open(os.path.join(positive_dir, filename))
        image.show()
    except:
        print("Image not found")
    #image = Image.open(os.path.join(positive_dir, filename))
    # image.show()
    idx += 1
    if idx == 20:
        break

# print accuracy
correct = sum([1 if p == l else 0 for p, l in zip(predictions, labels)])
accuracy = correct / len(labels)
print(f"Accuracy: {accuracy:.2f}")

File: mask_20241202-164658-012_x_-36_y_-41_rotation_254.png, Prediction: 0, Label: 0
Image not found
File: mask_20241202-164026-417_x_-21_y_-26_rotation_315.png, Prediction: 0, Label: 0
Image not found
File: mask_20241202-165147-095_153_156_130.png, Prediction: 1, Label: 1
File: mask_20241202-164717-213_x_-15_y_-3_rotation_247.png, Prediction: 0, Label: 1
File: mask_20241126-144235-798_x_-64_y_-164_rotation_351.png, Prediction: 0, Label: 0
Image not found
File: mask_20241203-160605-270_x_-29_y_-8_rotation_4.png, Prediction: 0, Label: 0
Image not found
File: mask_20241126-144545-253_x_-169_y_-2_rotation_261.png, Prediction: 0, Label: 0
Image not found
File: mask_20241203-171531-988_205_78_175.png, Prediction: 1, Label: 1
File: mask_20241203-084731-030_x_-18_y_-50_rotation_112.png, Prediction: 0, Label: 0
Image not found
File: mask_20241126-142546-042_x_-21_y_-1_rotation_356.png, Prediction: 0, Label: 0
Image not found
File: mask_20241202-165348-133_175_206_45.png, Prediction: 1, Label: 

Opening in existing browser session.
Opening in existing browser session.
Opening in existing browser session.
Opening in existing browser session.
Opening in existing browser session.
Opening in existing browser session.
Opening in existing browser session.


[105475:105475:0104/183219.954770:ERROR:object_proxy.cc(576)] Failed to call method: org.freedesktop.ScreenSaver.GetActive: object_path= /org/freedesktop/ScreenSaver: org.freedesktop.DBus.Error.NotSupported: This method is not part of the idle inhibition specification: https://specifications.freedesktop.org/idle-inhibit-spec/latest/
Created TensorFlow Lite XNNPACK delegate for CPU.
Attempting to use a delegate that only supports static-sized tensors with a graph that has dynamic-sized tensors (tensor#141 is a dynamic-sized tensor).
