In [10]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import Dataset
from PIL import Image
import os
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Define transformations to apply to images
transform = transforms.Compose([
    transforms.Resize((100, 100)),  # Resize images to a fixed size
    transforms.ToTensor(),           # Convert images to PyTorch tensors
])

class CustomDataset(Dataset):
    def __init__(self, root, transform=None):
        self.root = root
        self.transform = transform
        self.img_paths = []
        self.labels = []

        # Load images from the 'eyes' folder and assign label 0
        eyes_folder = os.path.join(root, "eyes")
        eyes_images = os.listdir(eyes_folder)
        self.img_paths.extend([os.path.join(eyes_folder, img) for img in eyes_images])
        self.labels.extend([0] * len(eyes_images))

        # Load images from the 'no_eyes' folder and assign label 1
        no_eyes_folder = os.path.join(root, "no_eyes")
        no_eyes_images = os.listdir(no_eyes_folder)
        self.img_paths.extend([os.path.join(no_eyes_folder, img) for img in no_eyes_images])
        self.labels.extend([1] * len(no_eyes_images))

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        label = self.labels[idx]

        img = Image.open(img_path).convert("RGB")
        if self.transform:
            img = self.transform(img)

        return img, label

# Define paths to the root folder containing 'eyes' and 'no_eyes' subfolders
root_folder = ""

# Create a custom dataset
dataset = CustomDataset(root_folder, transform=transform)

# Check the number of samples in the dataset
print("Total number of samples:", len(dataset))

# Check the label of the first sample
print("Label of the first sample:", dataset[900][1])

# Check the shape of the first image tensor
print("Shape of the first image tensor:", dataset[0][0].shape)


Total number of samples: 1847
Label of the first sample: 1
Shape of the first image tensor: torch.Size([3, 100, 100])


In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

# Define the CNN architecture
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(64 * 25 * 25, 128)
        self.fc2 = nn.Linear(128, 2)  # Output layer with 2 classes: eye or no_eye

    def forward(self, x):
        x = self.pool(nn.functional.relu(self.conv1(x)))
        x = self.pool(nn.functional.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Define transformations to apply to images
transform = transforms.Compose([
    transforms.Resize((100, 100)),  # Resize images to a fixed size
    transforms.ToTensor(),           # Convert images to PyTorch tensors
])

# Define paths to the root folder containing 'eyes' and 'no_eyes' subfolders
root_folder = ""

# Create a custom dataset
dataset = CustomDataset(root_folder, transform=transform)

# Split the dataset into training and validation sets
train_size = int(1 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=1)

# Initialize the model
model = CNN().to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [14]:
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)  # Move data to GPU
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}")


Epoch [1/10], Loss: 0.1250
Epoch [2/10], Loss: 0.0258
Epoch [3/10], Loss: 0.0333
Epoch [4/10], Loss: 0.0140
Epoch [5/10], Loss: 0.0509
Epoch [6/10], Loss: 0.0437
Epoch [7/10], Loss: 0.0006
Epoch [8/10], Loss: 0.0001
Epoch [9/10], Loss: 0.0000
Epoch [10/10], Loss: 0.0000


In [21]:
image_path = "../val_frames/10.jpg"
image = Image.open(image_path)

# Preprocess the image
input_image = transform(image).unsqueeze(0)
with torch.no_grad():
    model.eval()  # Set the model to evaluation mode
    output = model(input_image.to(device))

# Print the output
print("Model Output:")
print(output)
if(output[0][0] > output[0][1]):
    print("eyes detected")
else:
    print("no eyes")

Model Output:
tensor([[ 11.4857, -18.0972]], device='cuda:0')
eyes detected


In [None]:
# Save the trained model
torch.save(model.state_dict(), "eye_det_model.pth")
