In [15]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
import numpy as np
import cv2

In [16]:
class Net_4_layers(nn.Module):
    def __init__(self):
        super(Net_4_layers, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 6 * 6, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 16) # Add a new fully connected layer
        self.fc4 = nn.Linear(16, 2)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 6 * 6)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x

In [17]:
# Load your pre-trained image classifier model
# Replace 'your_model.pth' with the path to your model file
model_1.load_state_dict(torch.load('models/4-layer-net.pth'))
model_1.eval()

Net_4_layers(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=16, bias=True)
  (fc4): Linear(in_features=16, out_features=2, bias=True)
)

In [None]:
# Define the sliding window parameters
window_size = (100, 100)  # Size of the sliding window
step_size = 20  # Step size for moving the window


In [None]:
# Load the target image
image_path = 'target_image.jpg'  # Replace with your image path
image = Image.open(image_path)

In [None]:
# Create an image transform (resize, normalize, etc.) to match the model's input requirements
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Adjust to match the model's input size
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Adjust these values
])

# Convert the image to a PyTorch tensor and apply the transformation
image_tensor = transform(image).unsqueeze(0)  # Add batch dimension

# Get the dimensions of the target image
img_width, img_height = image.size

In [None]:
# Initialize an empty list to store the bounding box coordinates of detected faces
face_boxes = []

# Perform sliding window detection
for y in range(0, img_height - window_size[1], step_size):
    for x in range(0, img_width - window_size[0], step_size):
        # Extract the sub-image using the sliding window
        sub_image = image_tensor[:, :, y:y+window_size[1], x:x+window_size[0]]

        # Pass the sub-image through your pre-trained model for classification
        with torch.no_grad():
            output = model(sub_image)

        # Assuming your model outputs a probability (0-1) for face presence
        confidence = output.item()

        # Define a confidence threshold
        confidence_threshold = 0.9  # Adjust as needed

        if confidence > confidence_threshold:
            # Calculate the coordinates of the bounding box
            left = x
            top = y
            right = x + window_size[0]
            bottom = y + window_size[1]
            face_boxes.append((left, top, right, bottom))

# Optionally, apply non-maximum suppression to remove duplicate boxes

In [None]:
# Visualize the detected faces on the original image using OpenCV
image_cv = cv2.imread(image_path)
for box in face_boxes:
    cv2.rectangle(image_cv, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 2)

# Display or save the image with bounding boxes
cv2.imshow('Detected Faces', image_cv)
cv2.waitKey(0)
cv2.destroyAllWindows()