In [1]:
!pip install torch torchvision


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [2]:
import torch
from torch.utils.data import DataLoader
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.datasets import ImageFolder
from torchvision import transforms
import torchvision.transforms as T
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

In [None]:


# Load the pre-trained Faster R-CNN model with a ResNet-50 FPN backbone
model = fasterrcnn_resnet50_fpn(pretrained=True)

# Define the number of classes: 1 (face) + 1 (background)
num_classes = 2

# Get the input features of the classifier head
in_features = model.roi_heads.box_predictor.cls_score.in_features

# Replace the default classifier head with a new one for our custom class count
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)


In [None]:
import torch
import os
import pandas as pd
from PIL import Image
import torchvision.transforms as T

# Define transformations
transform = T.Compose([
    T.ToTensor()
])

# Custom Dataset class for face detection (CSV + folder)
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, csv_file, image_dir, transforms=None):
        self.df = pd.read_csv(csv_file)
        self.image_dir = image_dir
        self.transforms = transforms

        # Group by image
        self.image_names = self.df["image_name"].unique()
        self.image_to_annotations = self.df.groupby("image_name")

    def __getitem__(self, idx):
        image_name = self.image_names[idx]
        image_path = os.path.join(self.image_dir, os.path.basename(image_name))

        # Load image
        img = Image.open(image_path).convert("RGB")

        # Get annotations for this image
        records = self.image_to_annotations.get_group(image_name)

        boxes = []
        labels = []

        for _, row in records.iterrows():
            x0 = row["x0"]
            y0 = row["y0"]
            x1 = row["x1"]
            y1 = row["y1"]
            boxes.append([x0, y0, x1, y1])
            labels.append(1)  # Face class

        # Prepare target dictionary
        target = {
            "boxes": torch.tensor(boxes, dtype=torch.float32),
            "labels": torch.tensor(labels, dtype=torch.int64),
            "image_id": torch.tensor([idx])
        }

        if self.transforms:
            img = self.transforms(img)

        return img, target

    def __len__(self):
        return len(self.image_names)


In [None]:
train_dataset = CustomDataset(
    csv_file="/content/drive/MyDrive/HumanFaceDetection/faces.csv",
    image_dir="/content/drive/MyDrive/HumanFaceDetection/train_images",
    transforms=transform
)

val_dataset = CustomDataset(
    csv_file="/content/drive/MyDrive/HumanFaceDetection/faces.csv",
    image_dir="/content/drive/MyDrive/HumanFaceDetection/val_images",
    transforms=transform
)


In [None]:
import torch
from torch.utils.data import DataLoader, Subset
import torchvision.transforms as T
from sklearn.model_selection import train_test_split

# Transformations
transform = T.Compose([
    T.ToTensor()
])

# Use your CustomDataset class from previous cell
# Example CSV path and image folder
csv_file = "/content/drive/MyDrive/HumanFaceDetection/faces.csv"
image_dir = "/content/drive/MyDrive/Human Faces (Object Detection)/images"

# Initialize dataset
full_dataset = CustomDataset(csv_file=csv_file, image_dir=image_dir, transforms=transform)

# Create train-validation split (80-20)
indices = list(range(len(full_dataset)))
train_idx, val_idx = train_test_split(indices, test_size=0.2, random_state=42)

# Create subsets
train_dataset = Subset(full_dataset, train_idx)
valid_dataset = Subset(full_dataset, val_idx)

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True,
                          collate_fn=lambda x: tuple(zip(*x)))
valid_loader = DataLoader(valid_dataset, batch_size=4, shuffle=False,
                          collate_fn=lambda x: tuple(zip(*x)))


In [4]:
import pandas as pd
import os
import cv2
import torch
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import train_test_split
import torchvision.transforms as T
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from tqdm import tqdm
from torch.cuda.amp import autocast, GradScaler

# Define paths
base_dir = "/content/drive/MyDrive/Human Faces (Object Detection)"
image_dir = os.path.join(base_dir, "images")

# Load the CSV
csv_path = os.path.join(base_dir, "faces.csv")
df = pd.read_csv(csv_path)

# Define transformation
transform = T.Compose([T.ToTensor()])

# Get list of actual images in the image folder
all_images = set(os.listdir(image_dir))
df = df[df["image_name"].isin(all_images)].copy()

# Split the dataset into train and test
unique_images = df["image_name"].unique()
train_imgs, test_imgs = train_test_split(unique_images, test_size=0.2, random_state=42)

train_df = df[df["image_name"].isin(train_imgs)].copy()
test_df = df[df["image_name"].isin(test_imgs)].copy()

# Define CustomDataset
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, dataframe, image_dir, transforms=None):
        self.df = dataframe
        self.image_dir = image_dir
        self.transforms = transforms
        self.image_names = self.df["image_name"].unique()
        self.image_to_annotations = self.df.groupby("image_name")

    def __getitem__(self, idx):
        image_name = self.image_names[idx]
        image_path = os.path.join(self.image_dir, image_name)
        img = cv2.imread(image_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        records = self.image_to_annotations.get_group(image_name)
        boxes = []
        labels = []

        for _, row in records.iterrows():
            x0, y0, x1, y1 = row["x0"], row["y0"], row["x1"], row["y1"]
            boxes.append([x0, y0, x1, y1])
            labels.append(1)  # face class

        target = {
            "boxes": torch.tensor(boxes, dtype=torch.float32),
            "labels": torch.tensor(labels, dtype=torch.int64),
            "image_id": torch.tensor([idx])
        }

        if self.transforms:
            img = self.transforms(img)

        return img, target

    def __len__(self):
        return len(self.image_names)

# Create datasets and loaders
train_dataset = CustomDataset(train_df, image_dir, transforms=transform)
test_dataset = CustomDataset(test_df, image_dir, transforms=transform)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

# Load model
model = fasterrcnn_resnet50_fpn(pretrained=True)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes=2)

# Set up training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
optimizer = torch.optim.SGD([p for p in model.parameters() if p.requires_grad], lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
scaler = GradScaler()

# Train the model
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    print(f"\nEpoch {epoch+1}/{num_epochs}")
    for images, targets in tqdm(train_loader):
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        optimizer.zero_grad()

        with autocast():
            loss_dict = model(images, targets)
            loss = sum(loss for loss in loss_dict.values())

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        running_loss += loss.item()

    lr_scheduler.step()
    avg_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}] - Avg Loss: {avg_loss:.4f}")

# Save model
torch.save(model.state_dict(), "fasterrcnn_face_detector.pth")
print("\n✅ Model training complete and saved as 'fasterrcnn_face_detector.pth'")


Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:01<00:00, 165MB/s]
  scaler = GradScaler()



Epoch 1/10


  with autocast():
100%|██████████| 441/441 [07:08<00:00,  1.03it/s]


Epoch [1/10] - Avg Loss: 0.1864

Epoch 2/10


100%|██████████| 441/441 [06:26<00:00,  1.14it/s]


Epoch [2/10] - Avg Loss: 0.1505

Epoch 3/10


100%|██████████| 441/441 [06:24<00:00,  1.15it/s]


Epoch [3/10] - Avg Loss: 0.1394

Epoch 4/10


100%|██████████| 441/441 [06:21<00:00,  1.16it/s]


Epoch [4/10] - Avg Loss: 0.1204

Epoch 5/10


100%|██████████| 441/441 [06:24<00:00,  1.15it/s]


Epoch [5/10] - Avg Loss: 0.1174

Epoch 6/10


100%|██████████| 441/441 [06:25<00:00,  1.14it/s]


Epoch [6/10] - Avg Loss: 0.1151

Epoch 7/10


100%|██████████| 441/441 [06:28<00:00,  1.13it/s]


Epoch [7/10] - Avg Loss: 0.1122

Epoch 8/10


100%|██████████| 441/441 [06:24<00:00,  1.15it/s]


Epoch [8/10] - Avg Loss: 0.1118

Epoch 9/10


100%|██████████| 441/441 [06:27<00:00,  1.14it/s]


Epoch [9/10] - Avg Loss: 0.1110

Epoch 10/10


100%|██████████| 441/441 [06:27<00:00,  1.14it/s]


Epoch [10/10] - Avg Loss: 0.1111

✅ Model training complete and saved as 'fasterrcnn_face_detector.pth'


In [1]:
# # Print classification-style report
# print("\n--- Evaluation Report ---")
# print(f"Precision: {metrics.box.p.mean():.3f}")
# print(f"Recall: {metrics.box.r.mean():.3f}")
# print(f"mAP50: {metrics.box.map50.mean():.3f}")
# print(f"mAP50-95: {metrics.box.map.mean():.3f}")
# print(f"Number of Classes: {metrics.box.nc}")
