## Step 1: Install Dependencies

In [1]:
%pip install -q torch torchvision opencv-python pillow numpy matplotlib tqdm -U

Note: you may need to restart the kernel to use updated packages.


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
gensim 4.3.0 requires FuzzyTM>=0.4.0, which is not installed.
tables 3.8.0 requires blosc2~=2.0.0, which is not installed.
tables 3.8.0 requires cython>=0.29.21, which is not installed.
camel-ai 0.2.18 requires numpy<2,>=1, but you have numpy 2.2.6 which is incompatible.
langchain 0.3.15 requires numpy<2,>=1.22.4; python_version < "3.12", but you have numpy 2.2.6 which is incompatible.
langchain-community 0.3.15 requires numpy<2,>=1.22.4; python_version < "3.12", but you have numpy 2.2.6 which is incompatible.
numba 0.60.0 requires numpy<2.1,>=1.22, but you have numpy 2.2.6 which is incompatible.
openvino 2024.1.0 requires numpy<2.0.0,>=1.16.6, but you have numpy 2.2.6 which is incompatible.
patchify 0.2.3 requires numpy<2,>=1, but you have numpy 2.2.6 which is incompatible.
streamlit 1.41.1 requires pillow<12,>=7

## Step 2: Import Libraries and Setup

In [2]:
import torch
import torchvision
from pathlib import Path
import cv2
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import time
import statistics

print(f"PyTorch version: {torch.__version__}")
print(f"Torchvision version: {torchvision.__version__}")
print(f"Device: {torch.device('cuda' if torch.cuda.is_available() else 'cpu')}")

PyTorch version: 2.9.1+cpu
Torchvision version: 0.24.1+cpu
Device: cpu


## Step 3: Setup Dataset and Class Labels

In [3]:
# Your safety dataset configuration
DATASETS_DIR = Path("./datasets")
OUT_DIR = DATASETS_DIR
CFG_PATH = OUT_DIR / "data.yaml"

# Safety equipment class names
SAFETY_CLASSES = {
    0: 'Hardhat',
    1: 'Mask',
    2: 'NO-Hardhat',
    3: 'NO-Mask',
    4: 'NO-Safety Vest',
    5: 'Person',
    6: 'Safety Cone',
    7: 'Safety Vest',
    8: 'machinery',
    9: 'vehicle'
}

NUM_SAFETY_CLASSES = len(SAFETY_CLASSES)
NUM_CLASSES_WITH_BG = NUM_SAFETY_CLASSES + 1  # +1 for background

print(f"Safety Classes ({NUM_SAFETY_CLASSES}):")
for idx, name in SAFETY_CLASSES.items():
    print(f"  {idx}: {name}")
print(f"\nTotal classes for training: {NUM_CLASSES_WITH_BG} (including background)")

# Verify dataset structure
assert (CFG_PATH).exists(), f"Config file not found: {CFG_PATH}"
assert (OUT_DIR / "train" / "images").exists(), "Train images folder missing!"
assert (OUT_DIR / "valid" / "images").exists(), "Validation images folder missing!"
print("\n✓ Dataset structure verified")

Safety Classes (10):
  0: Hardhat
  1: Mask
  2: NO-Hardhat
  3: NO-Mask
  4: NO-Safety Vest
  5: Person
  6: Safety Cone
  7: Safety Vest
  8: machinery
  9: vehicle

Total classes for training: 11 (including background)

✓ Dataset structure verified


## Step 4: Create Custom Dataset for Faster R-CNN

In [4]:
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
from PIL import Image
import os

class SafetyDetectionDataset(Dataset):
    """
    Custom dataset for safety equipment detection
    Expects YOLO format labels (converted to bounding boxes)
    """
    def __init__(self, img_dir, label_dir, transforms=None):
        self.img_dir = Path(img_dir)
        self.label_dir = Path(label_dir)
        self.transforms = transforms
        
        # Get all image files
        self.images = sorted([f for f in os.listdir(img_dir) 
                            if f.lower().endswith(('.jpg', '.jpeg', '.png'))])
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        img_name = self.images[idx]
        img_path = self.img_dir / img_name
        
        # Load image
        image = Image.open(img_path).convert('RGB')
        img_width, img_height = image.size
        
        # Parse YOLO annotations
        label_file = self.label_dir / img_name.replace('.jpg', '.txt').replace('.png', '.txt')
        boxes = []
        labels = []
        
        if label_file.exists():
            with open(label_file, 'r') as f:
                for line in f:
                    parts = line.strip().split()
                    if len(parts) >= 5:
                        class_id = int(parts[0])
                        x_center = float(parts[1]) * img_width
                        y_center = float(parts[2]) * img_height
                        width = float(parts[3]) * img_width
                        height = float(parts[4]) * img_height
                        
                        # Convert to xyxy format
                        x1 = x_center - width / 2
                        y1 = y_center - height / 2
                        x2 = x_center + width / 2
                        y2 = y_center + height / 2
                        
                        boxes.append([x1, y1, x2, y2])
                        labels.append(class_id + 1)  # +1 because 0 is reserved for background
        
        # Convert to tensor
        image_tensor = T.ToTensor()(image)
        
        if len(boxes) > 0:
            boxes = torch.as_tensor(boxes, dtype=torch.float32)
            labels = torch.as_tensor(labels, dtype=torch.int64)
        else:
            boxes = torch.zeros((0, 4), dtype=torch.float32)
            labels = torch.zeros((0,), dtype=torch.int64)
        
        target = {
            'boxes': boxes,
            'labels': labels,
            'image_id': torch.tensor([idx]),
            'area': (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) if len(boxes) > 0 else torch.tensor([]),
            'iscrowd': torch.zeros((len(boxes),), dtype=torch.int64)
        }
        
        return image_tensor, target

# Create datasets
train_dataset = SafetyDetectionDataset(
    str(OUT_DIR / "train" / "images"),
    str(OUT_DIR / "train" / "labels")
)

val_dataset = SafetyDetectionDataset(
    str(OUT_DIR / "valid" / "images"),
    str(OUT_DIR / "valid" / "labels")
)

print(f"✓ Training dataset: {len(train_dataset)} images")
print(f"✓ Validation dataset: {len(val_dataset)} images")

# Custom collate function
def collate_fn(batch):
    return tuple(zip(*batch))

# Create dataloaders
train_loader = DataLoader(
    train_dataset,
    batch_size=4,
    shuffle=True,
    collate_fn=collate_fn,
    num_workers=0
)

val_loader = DataLoader(
    val_dataset,
    batch_size=4,
    shuffle=False,
    collate_fn=collate_fn,
    num_workers=0
)

print(f"\n✓ DataLoaders created")
print(f"  Train batches: {len(train_loader)} (batch_size=4)")
print(f"  Val batches: {len(val_loader)} (batch_size=4)")

✓ Training dataset: 2605 images
✓ Validation dataset: 114 images

✓ DataLoaders created
  Train batches: 652 (batch_size=4)
  Val batches: 29 (batch_size=4)


## Step 5: Create and Configure Model

In [5]:
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# Load a new ResNet model (not using the previous COCO-trained one)
finetuned_model = fasterrcnn_resnet50_fpn(pretrained=True, num_classes=91)

# Get the number of input features for the classifier
in_features = finetuned_model.roi_heads.box_predictor.cls_score.in_features

# Replace the pre-trained head with a new one for safety classes
# num_classes = 10 safety classes + 1 background
finetuned_model.roi_heads.box_predictor = FastRCNNPredictor(in_features, NUM_CLASSES_WITH_BG)

# Freeze backbone layers for transfer learning
for name, param in finetuned_model.backbone.named_parameters():
    param.requires_grad = False

# Freeze RPN
for param in finetuned_model.rpn.parameters():
    param.requires_grad = False

# Only train the head
print("=" * 70)
print("MODEL CONFIGURATION FOR TRANSFER LEARNING")
print("=" * 70)
print("\nFrozen layers (keeping ImageNet weights):")
print("  ✓ Backbone (ResNet50)")
print("  ✓ RPN (Region Proposal Network)")
print("\nTrainable layers (will be fine-tuned):")
print("  • ROI Head (box classifier)")
print("  • Box Predictor (for 11 classes: 10 safety + background)")

# Count parameters
total_params = sum(p.numel() for p in finetuned_model.parameters())
trainable_params = sum(p.numel() for p in finetuned_model.parameters() if p.requires_grad)

print(f"\nTotal parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"Frozen parameters: {total_params - trainable_params:,}")
print(f"Trainable ratio: {100 * trainable_params / total_params:.2f}%")

# Move model to device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
finetuned_model.to(device)
print(f"\n✓ Model moved to {device}")



MODEL CONFIGURATION FOR TRANSFER LEARNING

Frozen layers (keeping ImageNet weights):
  ✓ Backbone (ResNet50)
  ✓ RPN (Region Proposal Network)

Trainable layers (will be fine-tuned):
  • ROI Head (box classifier)
  • Box Predictor (for 11 classes: 10 safety + background)

Total parameters: 41,345,286
Trainable parameters: 13,952,055
Frozen parameters: 27,393,231
Trainable ratio: 33.75%

✓ Model moved to cpu


## Step 6: Training and Evaluation Functions

In [6]:
def train_epoch(model, train_loader, optimizer, device):
    """Train for one epoch"""
    model.train()
    total_loss = 0
    
    for images, targets in tqdm(train_loader, desc="Training"):
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        # Forward pass
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        
        # Backward pass
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        
        total_loss += losses.item()
    
    return total_loss / len(train_loader)

@torch.no_grad()
def evaluate(model, val_loader, device):
    """Evaluate model on validation set"""
    model.eval()
    
    all_boxes = []
    all_scores = []
    all_labels = []
    
    for images, targets in tqdm(val_loader, desc="Evaluating"):
        images = [img.to(device) for img in images]
        
        predictions = model(images)
        
        for pred in predictions:
            all_boxes.append(pred['boxes'].cpu().numpy())
            all_scores.append(pred['scores'].cpu().numpy())
            all_labels.append(pred['labels'].cpu().numpy())
    
    return {
        'boxes': all_boxes,
        'scores': all_scores,
        'labels': all_labels
    }

print("✓ Training and evaluation functions defined")

✓ Training and evaluation functions defined


## Step 7: Fine-tune the Model

In [None]:
import torch.optim as optim

# Training hyperparameters
NUM_EPOCHS = 10  # Start with 10 epochs, can increase if needed
LEARNING_RATE = 0.001  # Lower LR for fine-tuning
WARMUP_EPOCHS = 2

# Setup optimizer (only for trainable parameters)
params_to_optimize = [p for p in finetuned_model.parameters() if p.requires_grad]
optimizer = optim.Adam(params_to_optimize, lr=LEARNING_RATE)

# Learning rate scheduler
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

print("=" * 70)
print("FINE-TUNING CONFIGURATION")
print("=" * 70)
print(f"Epochs: {NUM_EPOCHS}")
print(f"Learning Rate: {LEARNING_RATE}")
print(f"Optimizer: Adam")
print(f"Device: {device}")
print(f"Train samples: {len(train_dataset)}")
print(f"Val samples: {len(val_dataset)}")
print("=" * 70)

# Training loop
train_losses = []
print("\nStarting fine-tuning...\n")

for epoch in range(NUM_EPOCHS):
    print(f"Epoch {epoch + 1}/{NUM_EPOCHS}")
    print("-" * 70)
    
    # Train
    train_loss = train_epoch(finetuned_model, train_loader, optimizer, device)
    train_losses.append(train_loss)
    
    print(f"Train Loss: {train_loss:.4f}")
    
    # Evaluate every 2 epochs
    if (epoch + 1) % 2 == 0:
        print(f"Evaluating on validation set...")
        val_results = evaluate(finetuned_model, val_loader, device)
        print(f"Evaluation completed. Found predictions.")
    
    # Update learning rate
    scheduler.step()
    print(f"Learning Rate: {scheduler.get_last_lr()[0]:.6f}\n")

print("=" * 70)
print("FINE-TUNING COMPLETED!")
print("=" * 70)

FINE-TUNING CONFIGURATION
Epochs: 10
Learning Rate: 0.001
Optimizer: Adam
Device: cpu
Train samples: 2605
Val samples: 114

Starting fine-tuning...

Epoch 1/10
----------------------------------------------------------------------


Training:   0%|          | 0/652 [00:00<?, ?it/s]

## Step 8: Save and Visualize Results

In [None]:
# Create directory for fine-tuned models
finetuned_models_dir = Path("./resnet_finetuned_safety")
finetuned_models_dir.mkdir(exist_ok=True)

# Save the fine-tuned model
finetuned_model_path = finetuned_models_dir / "finetuned_resnet_safety.pt"
torch.save({
    'model_state_dict': finetuned_model.state_dict(),
    'num_classes': NUM_CLASSES_WITH_BG,
    'class_names': SAFETY_CLASSES,
    'epoch': NUM_EPOCHS
}, str(finetuned_model_path))

print(f"✓ Fine-tuned model saved to: {finetuned_model_path}")

# Plot training loss
plt.figure(figsize=(10, 5))
plt.plot(range(1, NUM_EPOCHS + 1), train_losses, marker='o', linewidth=2, label='Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('ResNet50 Fine-tuning on Safety Dataset')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print(f"\nTraining Summary:")
print(f"  Initial Loss: {train_losses[0]:.4f}")
print(f"  Final Loss: {train_losses[-1]:.4f}")
print(f"  Loss Reduction: {((train_losses[0] - train_losses[-1]) / train_losses[0] * 100):.2f}%")

## Step 9: Test Fine-tuned Model

In [None]:
def test_finetuned_model(image_path, model, score_threshold=0.5):
    """Test fine-tuned model on single image"""
    
    # Load image
    image = Image.open(image_path).convert('RGB')
    image_tensor = T.ToTensor()(image).unsqueeze(0).to(device)
    
    # Inference
    model.eval()
    with torch.no_grad():
        predictions = model([image_tensor])
    
    # Draw detections
    img_array = np.array(image)
    boxes = predictions[0]['boxes'].cpu().numpy()
    scores = predictions[0]['scores'].cpu().numpy()
    labels = predictions[0]['labels'].cpu().numpy()
    
    detected_count = 0
    for box, score, label in zip(boxes, scores, labels):
        if score > score_threshold:
            detected_count += 1
            x1, y1, x2, y2 = box.astype(int)
            x1, y1 = max(0, x1), max(0, y1)
            x2 = min(img_array.shape[1], x2)
            y2 = min(img_array.shape[0], y2)
            
            # Map label to class name (subtract 1 because 0 is background)
            class_id = int(label) - 1
            class_name = SAFETY_CLASSES.get(class_id, f"Unknown_{class_id}")
            
            # Draw box
            cv2.rectangle(img_array, (x1, y1), (x2, y2), (0, 255, 0), 2)
            
            # Draw label
            text = f"{class_name}: {score:.2f}"
            cv2.putText(img_array, text, (x1, y1 - 5), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
    
    return Image.fromarray(img_array), detected_count

# Test on sample validation images
test_images = list(Path("./datasets/valid/images").glob("*.jpg"))[:3]

if test_images:
    print("Testing fine-tuned model on sample images...\n")
    for img_path in test_images:
        result_image, num_detected = test_finetuned_model(str(img_path), finetuned_model)
        print(f"{img_path.name}: {num_detected} safety objects detected")
        plt.figure(figsize=(10, 6))
        plt.imshow(result_image)
        plt.axis('off')
        plt.title(f"{img_path.name} - {num_detected} detections")
        plt.tight_layout()
        plt.show()

## Summary

✓ Fine-tuned ResNet50 model on safety equipment dataset  
✓ Transfer learning approach: froze backbone, trained head  
✓ Model detects 10 safety classes  
✓ Ready for quantization and OpenVINO deployment