In [3]:
def create_model(num_classes):
    # load Faster RCNN pre-trained model
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights='DEFAULT')
    # get the number of input features
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # define a new head for the detector with required number of classes
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

In [4]:
NUM_CLASSES = 4
DEVICE = 'cpu'
CLASSES = ['background', 'without_mask', 'with_mask', 'mask_weared_incorrect']

model_path = "F:\Sem-7\Project\Saved\model18.pth"

model = create_model(num_classes=NUM_CLASSES)
model = model.to(DEVICE)
model.load_state_dict(torch.load(model_path, map_location=DEVICE))
model.eval()

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

## Evaluating the model

In [5]:
import numpy as np

def calculate_iou(box1, box2):
    # Calculate Intersection over Union (IoU) between two bounding boxes
    
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])

    intersection_area = max(0, x2 - x1 + 1) * max(0, y2 - y1 + 1)
    box1_area = (box1[2] - box1[0] + 1) * (box1[3] - box1[1] + 1)
    box2_area = (box2[2] - box2[0] + 1) * (box2[3] - box2[1] + 1)

    iou = intersection_area / float(box1_area + box2_area - intersection_area)
#     print("iou", iou)
    return iou

def calculate_ap(precision, recall):
    # Calculate Average Precision (AP) using trapezoidal rule
    mrec = np.concatenate(([0.], recall, [1.]))
    mpre = np.concatenate(([0.], precision, [0.]))

    for i in range(mpre.size - 1, 0, -1):
        mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])

    indices = np.where(mrec[1:] != mrec[:-1])[0]
    ap = np.sum((mrec[indices + 1] - mrec[indices]) * mpre[indices + 1])
    return ap

def calculate_map(predictions, ground_truth, detection_threshold=0.5):
    # predictions and ground_truth are lists of dictionaries, each containing 'box', 'class', and 'confidence'

    classes = set([box['class'] for box in ground_truth])
    num_classes = len(classes)
#     print("num_classes",len(classes))

    total_ap = 0.0
    total_iou = 0.0

    for class_id in classes:
        true_positives = np.zeros(len(predictions))
        false_positives = np.zeros(len(predictions))
        num_gt = 0
        class_iou = 0.0

        for i, pred_box in enumerate(predictions):
#             print(pred_box, pred_box['class'], class_id)
#             print(pred_box['class'] == class_id)
            if pred_box['class'] == class_id and pred_box['confidence'] >= detection_threshold:
                num_gt += 1
                ious = [calculate_iou(pred_box['box'], gt_box['box']) for gt_box in ground_truth if gt_box['class'] == class_id]
                
                if any(iou >= 0.5 for iou in ious):
                    true_positives[i] = 1
                else:
                    false_positives[i] = 1

                class_iou += max(ious, default=0.0)

#         print(true_positives)
#         print(false_positives)
#         print(num_gt)
        cumulative_precision = np.cumsum(true_positives) / (np.cumsum(true_positives) + np.cumsum(false_positives))
        cumulative_recall = np.cumsum(true_positives) / num_gt

        ap = calculate_ap(cumulative_precision, cumulative_recall)

        if not np.isnan(ap):
            total_ap += ap
        total_iou += class_iou / max(num_gt, 1)  # Avoid division by zero

#     print("ap",ap,"total_ap",total_ap)
    mAP = total_ap / num_classes
    mIoU = total_iou / num_classes
    return mAP, mIoU

In [6]:
def evaluate_on_dataset(dataset):
    total_mAP = 0.0
    total_mIoU = 0.0
    num_images = len(dataset)

    for image_data in dataset:
        # Extract predictions and ground truth for each image
        predictions = image_data['predictions']
        ground_truth = image_data['ground_truth']

        # Call the calculate_map function for each image
        mAP, mIoU = calculate_map(predictions, ground_truth, detection_threshold=0.5)
        # print(mAP, mIoU)
        # Accumulate mAP and mIoU for all images
        total_mAP += mAP
        total_mIoU += mIoU
#         print("total_mAP", total_mAP, "total_mIoU", total_mIoU)

    print("num_images", num_images)
    # Calculate average mAP and mIoU across all images
    average_mAP = total_mAP / num_images
    average_mIoU = total_mIoU / num_images

    return average_mAP, average_mIoU

In [7]:
def test_model(images_dir,xml_dir, model, detection_threshold):

    all_imgs = os.listdir(images_dir)
    
    split = 0.15
    trainTest_split = int((1-split)*len(all_imgs))

    trainVal_df = all_imgs[:trainTest_split]
    test_imgs = all_imgs[trainTest_split:]
    print(len(test_imgs))
    
    width = 400
    height = 400
    
    dataset = []
    for img in test_imgs:
        image_data = {}
        # print(img)
        predictions = []
        
        image = cv2.imread(images_dir+img)
#         print(image.size)
        orig_image = image.copy()
        # BGR to RGB
        image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB).astype(np.float32)
        # make the pixel range between 0 and 1
        image /= 255.0
        # bring color channels to front
        image = np.transpose(image, (2, 0, 1)).astype(float)
        # convert to tensor
        image = torch.tensor(image, dtype=torch.float)
        # add batch dimension
        image = torch.unsqueeze(image, 0)
        with torch.no_grad():
            outputs = model(image)

        # load all detection to CPU for further operations
        outputs = [{k: v.to('cpu') for k, v in t.items()} for t in outputs]
#         print(outputs)
        # carry further only if there are detected boxes
        if len(outputs[0]['boxes']) != 0:
            boxes = outputs[0]['boxes'].data.numpy()
            scores = outputs[0]['scores'].data.numpy()
            labels = outputs[0]['labels'].data.numpy()
            
            for box,label,score in zip(boxes,labels,scores):
                t_dict = {}
                t_dict['box'] = list(box)
                t_dict['class'] = label
                t_dict['confidence'] = score
                predictions.append(t_dict)
#         print(predictions)
#         print()
        
        ground_truth = []
        annot_img_name = img[:-4]+".xml"
        annot_file_path = os.path.join(xml_dir, annot_img_name)
        
        boxes = []
        labels = []
        classes_dict = {'without_mask': 1, 'with_mask': 2, 'mask_weared_incorrect': 3}

        tree = et.parse(annot_file_path)
        root = tree.getroot()

        # get the height and width of the image
        for i in root.findall('size'):
            image_width = int(i.find('width').text)
            image_height = int(i.find('height').text)

        for member in root.findall('object'):
            # map the current object name to `classes` list to get...
            # ... the label index and append to `labels` list
            labels.append(member.find('name').text)

            # xmin = left corner x-coordinates
            xmin = int(member.find('bndbox').find('xmin').text)
            # xmax = right corner x-coordinates
            xmax = int(member.find('bndbox').find('xmax').text)
            # ymin = left corner y-coordinates
            ymin = int(member.find('bndbox').find('ymin').text)
            # ymax = right corner y-coordinates
            ymax = int(member.find('bndbox').find('ymax').text)


            boxes.append([xmin, ymin, xmax, ymax])

        
        for box,label in zip(boxes,labels):
            t_dict = {}
            t_dict['box'] = list(box)
            t_dict['class'] = classes_dict[label]
            ground_truth.append(t_dict)
            
        image_data['predictions'] = predictions
        image_data['ground_truth'] = ground_truth
        dataset.append(image_data)
            
#     print(dataset) 
    average_mAP, average_mIoU = evaluate_on_dataset(dataset)

    print(f"Average mAP: {average_mAP:.4f}, Average mIoU: {average_mIoU:.4f}")


In [8]:
images_dir = "F:/Sem-7/Project/dataset/images/"
xml_dir = "F:/Sem-7/Project/dataset/annotations/"
detection_threshold = 0.8
test_model(images_dir,xml_dir,model,detection_threshold)

128


  cumulative_precision = np.cumsum(true_positives) / (np.cumsum(true_positives) + np.cumsum(false_positives))
  cumulative_recall = np.cumsum(true_positives) / num_gt
  cumulative_recall = np.cumsum(true_positives) / num_gt


num_images 128
Average mAP: 0.8130, Average mIoU: 0.6980


## Inference

In [9]:
import tkinter as tk
from tkinter import filedialog
from tkinter import *
from PIL import ImageTk, Image

In [10]:
top=tk.Tk()
top.geometry('800x600')
top.title('Mask Detection')
top.configure(background='#427D9D')

label=Label(top,background='#CDCDCD', font=('arial',15,'bold'), text = "check")
sign_image = Label(top)
sign_image.pack(side=BOTTOM,expand=True)

In [11]:
def upload_image():
    try:
        file_path=filedialog.askopenfilename()
        uploaded=Image.open(file_path)
        uploaded.thumbnail(((top.winfo_width()/2),(top.winfo_height()/2)))
        im=ImageTk.PhotoImage(uploaded)
        sign_image.configure(image=im)
        sign_image.image=im
        label.configure(text='')
        show_classify_button(file_path)
    except:
        pass

def classify(file_path):
    detection_threshold = 0.7

    image = cv2.imread(file_path)
    orig_image = image.copy()
    # BGR to RGB
    image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB).astype(np.float32)
    # make the pixel range between 0 and 1
    image /= 255.0
    # bring color channels to front
    image = np.transpose(image, (2, 0, 1)).astype(float)
    # convert to tensor
    image = torch.tensor(image, dtype=torch.float)
    # add batch dimension
    image = torch.unsqueeze(image, 0)
    with torch.no_grad():
        outputs = model(image)

    # load all detection to CPU for further operations
    outputs = [{k: v.to('cpu') for k, v in t.items()} for t in outputs]
#     print(outputs)
    # carry further only if there are detected boxes
    if len(outputs[0]['boxes']) != 0:
        boxes = outputs[0]['boxes'].data.numpy()
        scores = outputs[0]['scores'].data.numpy()
        labels = outputs[0]['labels'].data.numpy()
#         print("labels before applyinng threshold")
#         print(labels)
        # filter out boxes according to `detection_threshold`/\\
        boxes = boxes[scores >= detection_threshold].astype(np.int32)
        labels = labels[scores >= detection_threshold]
#         print("labels after applyinng threshold")
#         print(labels)
        draw_boxes = boxes.copy()

        # draw the bounding boxes
        for box, label in zip(draw_boxes, labels):
            label = CLASSES[label]

            if label == "without_mask":
                color = (0, 0, 255)

            elif label == "with_mask":
                color = (0, 255, 0)

            elif label == "mask_weared_incorrect":
                color = (255, 0, 0)

            cv2.rectangle(orig_image,
                        (int(box[0]), int(box[1])),
                        (int(box[2]), int(box[3])),
                        color, 1)
            cv2.putText(orig_image, label,
                        (int(box[0]), int(box[1])-5),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.4, color, 1)

        image_rgb = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(image_rgb, 'RGB')
        img.save("./check.png")
        uploaded=Image.open("./check.png")
        uploaded.thumbnail(((top.winfo_width()/2),(top.winfo_height()/2)))
        im=ImageTk.PhotoImage(uploaded)
        sign_image.configure(image=im)
        sign_image.image=im



def show_classify_button(file_path):
    classify_b=Button(top,text="Classify Image",command=lambda: classify(file_path),padx=10,pady=5)
    classify_b.configure(background='#164863', foreground='white',font=('arial',10,'bold'))
    classify_b.place(relx=0.79,rely=0.46)

In [12]:
upload=Button(top,text="Upload an image",command=upload_image,padx=10,pady=5)
upload.configure(background='#164863', foreground='white',font=('arial',10,'bold'))
upload.pack(side=BOTTOM,pady=20)

In [13]:
top.mainloop()