In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        (os.path.join(dirname, filename))

        

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import wandb


wandb.login(key='c6173cf27280b68511e3c01d809d179681002c51')
wandb.init(
    project="face-mask-detection",
    config={
        "learning_rate": 1e-4,
        "epochs": 80,
        "batch_size": 8,
        "optimizer": "AdamW",
    }
)


In [None]:
import os
import cv2
import time
import torch
import numpy as np
import torchvision
import matplotlib.pyplot as plt

from torch import nn, optim
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision import transforms, models
from torchvision.models.detection.ssd import SSD300_VGG16_Weights
from torchvision.models.detection import ssd300_vgg16
from torchvision.ops import nms

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)



In [None]:
import os
import cv2
import torch
import numpy as np
import xml.etree.ElementTree as ET

from torch.utils.data import Dataset
from torchvision import transforms

class FaceMaskDataset(Dataset):
   
    
    def __init__(self, root, transform=None):
        self.root = root
        self.transform = transform
        
        self.image_dir = os.path.join(root, "images")
        self.anno_dir  = os.path.join(root, "annotations")

        
        self.image_files = sorted(
            [f for f in os.listdir(self.image_dir) if f.lower().endswith('.png')]
        )

        
        self.name2label = {
            "background": 0,
            "with_mask": 1,
            "without_mask": 2,
            "mask_weared_incorrect": 3
        }

    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        
        img_filename = self.image_files[idx]
        img_path = os.path.join(self.image_dir, img_filename)
        
        
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        
        xml_filename = os.path.splitext(img_filename)[0] + ".xml"
        anno_path = os.path.join(self.anno_dir, xml_filename)

        boxes, labels = self.parse_voc_xml(anno_path)
        
        
        boxes  = torch.as_tensor(boxes,  dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)

        target = {
            'boxes':  boxes,
            'labels': labels
        }

        
        return image, target

    def parse_voc_xml(self, xml_path):
        
        boxes  = []
        labels = []
        
        tree = ET.parse(xml_path)
        root = tree.getroot()

        
        for obj in root.findall("object"):
            class_name = obj.find("name").text  
            label_id = self.name2label.get(class_name, 0)  
            
            bndbox = obj.find("bndbox")
            if bndbox is not None:
                xmin = float(bndbox.find("xmin").text)
                ymin = float(bndbox.find("ymin").text)
                xmax = float(bndbox.find("xmax").text)
                ymax = float(bndbox.find("ymax").text)
                boxes.append([xmin, ymin, xmax, ymax])
                labels.append(label_id)

        return boxes, labels


In [None]:
import albumentations as A
from albumentations.pytorch import ToTensorV2


train_transform = A.Compose([
    A.RandomResizedCrop(width=300, height=300, scale=(0.8, 1.0), p=1.0),
    A.HorizontalFlip(p=0.5),
    A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, p=0.5),
    A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05,
                       rotate_limit=15, border_mode=0, p=0.5),
    
    ToTensorV2()
],
bbox_params=A.BboxParams(
    format='pascal_voc',  # or 'yolo', etc.
    label_fields=['labels'], 
    min_area=1, 
    min_visibility=0.1
))


val_transform = A.Compose([
    A.Resize(300, 300),
    A.Normalize(mean=(0.485, 0.456, 0.406),
                std=(0.229, 0.224, 0.225), max_pixel_value=255.0),
    ToTensorV2()
],
bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']))

In [None]:
dataset_root = "/kaggle/input/face-mask-detection" 




full_dataset = FaceMaskDataset(root=dataset_root, transform=train_transform)


train_size = int(0.8 * len(full_dataset))
val_size   = len(full_dataset) - train_size

train_dataset, val_dataset = random_split(
    full_dataset, 
    [train_size, val_size],
    generator=torch.Generator().manual_seed(42)  
)

print("Train samples:", len(train_dataset))
print("Val samples:", len(val_dataset))


train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, 
                          collate_fn=lambda x: tuple(zip(*x)))
val_loader   = DataLoader(val_dataset,   batch_size=8, shuffle=False, 
                          collate_fn=lambda x: tuple(zip(*x)))


In [None]:
from torchvision.models.detection.ssd import SSD300_VGG16_Weights
from torchvision.models.detection import ssd300_vgg16

num_classes = 4  # background + 3 face mask classes

model = ssd300_vgg16(weights=SSD300_VGG16_Weights.DEFAULT)


model.head.classification_head.num_classes = num_classes
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
wandb.watch(model, log="all")
model.to(device)


In [None]:
from torch.optim import AdamW
from torch.optim.lr_scheduler import ReduceLROnPlateau
params = [
    {"params": [p for p in model.parameters() if p.requires_grad], "weight_decay": 2e-4}
]
optimizer = AdamW(
    params,
    lr=2e-4,           
      
)
from torch.optim.lr_scheduler import StepLR
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.1)
num_epochs = 80  

model.train()
for epoch in range(num_epochs):
    epoch_loss = 0.0
    
    for images, targets in train_loader:
        
        images = [torch.tensor(img, dtype=torch.float32).permute(2, 0, 1).to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        optimizer.zero_grad()
        loss_dict = model(images, targets) 
        losses = sum(loss for loss in loss_dict.values())
        losses.backward()
        optimizer.step()

        epoch_loss += losses.item()

    
    avg_loss = epoch_loss / len(train_loader)
    scheduler.step(avg_loss)
    
    wandb.log({"epoch": epoch + 1, "loss": avg_loss})
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}")

In [None]:
torch.save(model.state_dict(), "model.pt")

In [None]:

model.load_state_dict(torch.load("model.pt", map_location=device))
model.to(device)
model.eval()
label_names = ["background", "with_mask", "without_mask", "mask_weared_incorrect"]


In [None]:
def predict_and_visualize(model, image_path, device, threshold=0.5, iou_threshold=0.5):
    import cv2
    import torch
    from torchvision.ops import nms
    import matplotlib.pyplot as plt
    
    
    image = cv2.imread(image_path)
    if image is None:
        print(f"Failed to load image {image_path}")
        return
    orig_image = image.copy()
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    
    resized_image = cv2.resize(image_rgb, (300, 300))
    image_tensor = torch.tensor(resized_image, dtype=torch.float32).permute(2, 0, 1) / 255.0  # Normalize to [0, 1]
    image_tensor = image_tensor.unsqueeze(0).to(device)  

   
    with torch.no_grad():
        predictions = model(image_tensor)[0]

    
    boxes = predictions['boxes']
    scores = predictions['scores']
    labels = predictions['labels']

    
    
    keep = scores >= threshold
    boxes = boxes[keep]
    scores = scores[keep]
    labels = labels[keep]

    
    if len(boxes) > 0:
        keep_indices = nms(boxes, scores, iou_threshold)
        boxes = boxes[keep_indices].cpu().numpy()
        scores = scores[keep_indices].cpu().numpy()
        labels = labels[keep_indices].cpu().numpy()

    
    orig_height, orig_width = orig_image.shape[:2]
    scale_x = orig_width / 300
    scale_y = orig_height / 300

    
    for box, score, label in zip(boxes, scores, labels):
        x_min, y_min, x_max, y_max = box
        
        x_min = int(x_min * scale_x)
        y_min = int(y_min * scale_y)
        x_max = int(x_max * scale_x)
        y_max = int(y_max * scale_y)

       
        if label ==1:
            color = (0, 255, 0)  #"with_mask"
        elif label == 2:
            color = (0, 0, 255)  #"without_mask"
        elif label == 3:
            color = (255, 0, 0)  #"mask_weared_incorrect"

        
        cv2.rectangle(orig_image, (x_min, y_min), (x_max, y_max), color, 2)

        
        label_text = f"{label}: {score:.2f}"
        cv2.putText(orig_image, label_text, (x_min, y_min - 10), 
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    
    display_image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB)

    
    plt.figure(figsize=(12, 8))
    plt.imshow(display_image)
    plt.axis('off')
    plt.show()


In [None]:
test_images_dir = "/kaggle/input/face-mask-detection/images"  


test_image_paths = [
    os.path.join(test_images_dir, fname) 
    for fname in os.listdir(test_images_dir) 
    if fname.lower().endswith(('.png', '.jpg', '.jpeg'))
]


for img_path in test_image_paths:
    print(f"Processing {img_path}...")
    predict_and_visualize(model, img_path, device, threshold=0.5, iou_threshold=0.4)
