In [None]:
%load_ext autoreload
%autoreload 2

import os
import time
import glob
import torch
import torch.nn.functional as F
import pandas as pd
import numpy as np
import csv
import cv2
import xlwings as xw
import torchvision.transforms as transforms

#libraries for yolo
from pytorchyolo.models import load_model
from pytorchyolo.utils.transforms import Resize, DEFAULT_TRANSFORMS
from pytorchyolo.utils.utils import non_max_suppression
from pytorchyolo.utils.loss import compute_loss

In [None]:
from scripts.utils import detach_cpu, tensor_to_np_img, save_tensor_img, clone_detach

## Face Detector Models

In [None]:
from scripts.face_detectors import MediaPipe, YuNet, YoloFace

In [None]:
mp = MediaPipe()
yn = YuNet()
yf = YoloFace()

## YOLOFace with FGSM

In [None]:
# Patterned after FGSM tutorial (https://pytorch.org/tutorials/beginner/fgsm_tutorial.html)
# Define what device we are using
print("CUDA Available: ", torch.cuda.is_available())
main_yf = YoloFace()
device, model = main_yf.device, main_yf.yf_face_detector


# Set the model in evaluation mode. In this case this is for the Dropout layers
model.eval()

## Image Feature Extraction

In [None]:
from scripts import image_attributes
from scripts.image_attributes import extract_image_attributes

## Load Masks

In [None]:
def load_mask(filename, face_num, target_bbox):
    # Get the mask name and open it
    filename = "restored_mask_" + os.path.splitext(filename)[0] + "_" + str(face_num) + "_image_final.png"
    mask = cv2.imread(os.path.join(os.getcwd(), RESTORED_MASK_PATH, filename), 0)
    
    # Find the corresponding row of the mask in the FACES_DF to get the padding
    face_row = FACES_DF.loc[FACES_DF['filename'] == filename]
    padded_dim = (int(face_row["x2_pad"] - face_row["x1_pad"]), int(face_row["y2_pad"] - face_row["y1_pad"]))
    
    # Get the target dimensions based on the target bounding boxes
    target_dim = (int(target_bbox[2] - target_bbox[0]), int(target_bbox[3] - target_bbox[1]))
    
    # Get the number of white pixels in the mask
    num_white = dict(zip(*np.unique(mask, return_counts = True)))[255]
    # Return an clear if the number of white pixels is less than 10% of the target dimensions
    if num_white < int(target_dim[0] * target_dim[1] * 0.1):
        return torch.ones((1, 3, target_dim[1], target_dim[0])), False
    
    # Appply morphological transformation to the mask
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (int(mask.shape[0] * 0.5), int(mask.shape[1] * 0.5)))
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
    mask = cv2.cvtColor(mask, cv2.COLOR_GRAY2RGB)
    mask = transforms.Compose([DEFAULT_TRANSFORMS])((mask, np.zeros((1, 5))))[0].unsqueeze(0)
    
    # Masks are always square and should be smaller than the padded dimensions and target dimensions
    current_dim = max(mask.shape)
    diff_x, diff_y = abs(padded_dim[0] - current_dim) / 2, abs(padded_dim[1] - current_dim) / 2
    
    # Adjust the mask to be the same as the padded dimensions
    if diff_y != 0:
        mask = mask[..., int(np.floor(diff_y)):-int(np.ceil(diff_y)), :]
    if diff_x != 0:
        mask = mask[..., int(np.floor(diff_x)):-int(np.ceil(diff_x))]
        
    # Calculate the actual size without the padding
    padding = [
        int(abs(face_row["x1"] - face_row["x1_pad"])),
        int(abs(face_row["y1"] - face_row["y1_pad"])),
        int(abs(face_row["x2"] - face_row["x2_pad"])),
        int(abs(face_row["y2"] - face_row["y2_pad"]))
    ]
    
    # Adjust the padding so that it will match the target bounding box
    new_dim = padded_dim[0] - padding[0] - padding[2], padded_dim[1] - padding[1] - padding[3]
    diff_x, diff_y = (target_dim[0] - new_dim[0]) / 2, (target_dim[1] - new_dim[1]) / 2
    
    padding[0] -= int(np.floor(diff_x))
    padding[1] -= int(np.floor(diff_y))
    padding[2] -= int(np.ceil(diff_x))
    padding[3] -= int(np.ceil(diff_y))
    
    mask = F.pad(input=mask, pad=(-padding[0], -padding[2], -padding[1], -padding[3]), mode='constant', value=0)
    
    return mask, True

## FGSM

In [None]:
import scripts.fgsm as fgsm

In [None]:
def pipeline(model, device):
    torch.autograd.set_detect_anomaly(True)
    
    df = pd.DataFrame() # dataframe storing the dataset
    row = {} #the information/columns for a single row in the dataset is stored here
    
    # Loop over all examples in input path
    for path in glob.glob(os.path.join(INPUT_PATH, '*.jpg')):
        file_basename = os.path.basename(path)
        print(file_basename, end=" ")
        
        if use_refset:
            # Get indices of faces in the image
            face_indices = set(REF_SET.loc[REF_SET['source_file'] == file_basename, "face_index"])
            # If there are no face indices then it means the image is not in the ref set
            if not face_indices:
                print("(skipped)")
                continue
        print("<- working on")
        
        row['path'] = path
        
        model.eval()
        model.gradient_mode = False
        
        for yolo_layer in model.yolo_layers:
            yolo_layer.gradient_mode = False

        # Read and transform the image from the path
        data = cv2.imread(path)
        row['source_w'], row['source_h'], _ = data.shape
        data = cv2.cvtColor(data, cv2.COLOR_BGR2RGB)
        data = transforms.Compose([DEFAULT_TRANSFORMS,Resize(416)])((data, np.zeros((1, 5))))[0].unsqueeze(0)

        with torch.no_grad():
            # Forward pass the data through the model and call non max suppression
            nms, nms_output = non_max_suppression(model(data), 0.5, 0.5) #conf_thres and iou_thres = 0.5

        face_list = []
        if type(nms_output[0]) is not int:
            face_list = nms_output[0]

        data = data.to(device)
        
        # Set requires_grad attribute of tensor. Important for attack
        data.requires_grad = True

        model.gradient_mode = True
        for yolo_layer in model.yolo_layers:
            yolo_layer.gradient_mode = True

        output = model(data)

        # loop through each of the faces in the image
        for face_index, face_row in enumerate(face_list): #nms_output[0] because the model is designed to take in several images at a time from the dataloader but we are only loading the image one at a time
            
            if face_index not in face_indices:
                continue

            row['face_index'] = face_index
            print("Face", face_index)

            row['obj_score'] = face_row[4].item()
            row['class_score'] = face_row[5].item()
            x, y, w, h = face_row[0], face_row[1], face_row[2], face_row[3]

            normal_x, normal_y, normal_w, normal_h = x / 415, y / 415, w / 415, h / 415

            if fgsm_loss_target == "bbox":
                target = torch.tensor([[face_row[4].item(), face_row[5].item(), 0, 0, 0, 0]])
            elif fgsm_loss_target == "conf":
                target = torch.tensor([[0.0, 0, normal_x, normal_y, normal_w, normal_h]])

            target = target.to(device)
            loss, loss_components = compute_loss(output, target, model)

            # cropped image with bounding box
            # getting (x1, y1) upper left, (x2, y2) lower right
            x1 = max(int(np.floor((x - w / 2).detach().cpu().numpy())), 0)
            y1 = max(int(np.floor((y - h / 2).detach().cpu().numpy())), 0)
            x2 = min(int(np.ceil((x + w / 2).detach().cpu().numpy())), 415)
            y2 = min(int(np.ceil((y + h / 2).detach().cpu().numpy())), 415)

            row['x1'], row['y1'], row['x2'], row['y2'] = x1, y1, x2, y2

            cropped_image = detach_cpu(data)[:, :, y1:y2, x1:x2] #get the first dimension, the channels, and crop it
            cropped_image = tensor_to_np_img(cropped_image) #reshape the image to (w/h, h/w, channel)

            # Zero all existing gradients
            model.zero_grad()
            data.grad = None

            # Calculate gradients of model in backward pass
            loss.backward(retain_graph=True) #TODO: Amos - check if this is correct

            # Collect datagrad
            data_grad = data.grad.data
            if save_noise:
                save_tensor_img(data_grad, os.path.join(NOISE_OUTS, fgsm_loss_target + "_" + str(face_index) + "_" + os.path.basename(path)))

            # Get mask
            bbox = (x1, y1, x2, y2)
            mask, used_mask = load_mask(os.path.basename(path), face_index, bbox)
            row['used_mask'] = used_mask

            row = extract_image_attributes(row, path, face_index, cropped_image * tensor_to_np_img(mask[0]), "mask")
            row = extract_image_attributes(row, path, face_index, cropped_image, "bbox")

            whole_mask = np.zeros(data.shape)
            whole_mask[..., y1:y2, x1:x2] = mask
            
            inverted_mask = np.zeros(data.shape)
            inverted_mask[..., y1:y2, x1:x2] = (1 - whole_mask[..., y1:y2, x1:x2]) if used_mask else whole_mask[..., y1:y2, x1:x2]
            
            bbox_mask = np.zeros(data.shape)
            bbox_mask[..., y1:y2, x1:x2] = 1
            
            large_x1 = max(int(np.floor((x - w).detach().cpu().numpy())), 0)
            large_y1 = max(int(np.floor((y - h).detach().cpu().numpy())), 0)
            large_x2 = min(int(np.ceil((x + w).detach().cpu().numpy())), 415)
            large_y2 = min(int(np.ceil((y + h).detach().cpu().numpy())), 415)

            large_bbox_mask = np.zeros(data.shape)
            large_bbox_mask[..., large_y1:large_y2, large_x1:large_x2] = 1

            print("Calculating min epsilon for models...")
            
            yn_min_e_face = fgsm.find_min_e(clone_detach(data), clone_detach(data_grad), yn, whole_mask, bbox)
            mp_min_e_face = fgsm.find_min_e(clone_detach(data), clone_detach(data_grad), mp, whole_mask, bbox)
            yf_min_e_face = fgsm.find_min_e(clone_detach(data), clone_detach(data_grad), yf, whole_mask, bbox)

            print("yn min face:", yn_min_e_face, "mp min face:", mp_min_e_face, "yf min face:", yf_min_e_face)
            row['e_face_yn'], row['e_face_mp'], row['e_face_yf'] = yn_min_e_face, mp_min_e_face, yf_min_e_face

            if used_mask:
                yn_min_e_bg = fgsm.find_min_e(clone_detach(data), clone_detach(data_grad), yn, inverted_mask, bbox, background=True)
                mp_min_e_bg = fgsm.find_min_e(clone_detach(data), clone_detach(data_grad), mp, inverted_mask, bbox, background=True)
                yf_min_e_bg = fgsm.find_min_e(clone_detach(data), clone_detach(data_grad), yf, inverted_mask, bbox, background=True)
            else:
                yn_min_e_bg = yn_min_e_face
                mp_min_e_bg = mp_min_e_face
                yf_min_e_bg = yf_min_e_face

            print("yn min bg:", yn_min_e_bg, "mp min bg:", mp_min_e_bg, "yf min bg:", yf_min_e_bg, "used mask" if used_mask else "did not use mask")
            row['e_bg_yn'], row['e_bg_mp'], row['e_bg_yf'] = yn_min_e_bg, mp_min_e_bg, yf_min_e_bg

            if used_mask:
                yn_min_e_bbox = fgsm.find_min_e(clone_detach(data), clone_detach(data_grad), yn, bbox_mask, bbox)
                mp_min_e_bbox = fgsm.find_min_e(clone_detach(data), clone_detach(data_grad), mp, bbox_mask, bbox)
                yf_min_e_bbox = fgsm.find_min_e(clone_detach(data), clone_detach(data_grad), yf, bbox_mask, bbox)
            else:
                yn_min_e_bbox = yn_min_e_face
                mp_min_e_bbox = mp_min_e_face
                yf_min_e_bbox = yf_min_e_face

            print("yn min bbox:", yn_min_e_bbox, "mp min bbox:", mp_min_e_bbox, "yf min bbox:", yf_min_e_bbox)
            row['e_bbox_yn'], row['e_bbox_mp'], row['e_bbox_yf'] = yn_min_e_bbox, mp_min_e_bbox, yf_min_e_bbox

            yn_min_e_lbbox = fgsm.find_min_e(clone_detach(data), clone_detach(data_grad), yn, large_bbox_mask, bbox)
            mp_min_e_lbbox = fgsm.find_min_e(clone_detach(data), clone_detach(data_grad), mp, large_bbox_mask, bbox)
            yf_min_e_lbbox = fgsm.find_min_e(clone_detach(data), clone_detach(data_grad), yf, large_bbox_mask, bbox)

            print("yn min lbbox:", yn_min_e_lbbox, "mp min lbbox:", mp_min_e_lbbox, "yf min lbbox:", yf_min_e_lbbox)
            row['e_lbbox_yn'], row['e_lbbox_mp'], row['e_lbbox_yf'] = yn_min_e_lbbox, mp_min_e_lbbox, yf_min_e_lbbox
            df = pd.concat([df, pd.DataFrame([row])], axis=0, ignore_index=True)
            
    df.to_csv(os.path.join(CSV_PATH, FOLDER_NAME + output_csv_tag + str(int(time.time())) + '.csv'), index=False)  #save to csv

In [None]:
# Settings
image_attributes.save_color_images = False
image_attributes.save_lbp_images = False
image_attributes.save_gradient_images = False
fgsm_loss_target = "conf" # or "bbox"
output_csv_tag = "no_random"
save_noise = False
use_refset = True

In [None]:
folders = ["1--Handshaking"]
fails = []
OUTPUT_FOLDER = os.path.join(os.getcwd(), "outputs")

REF_SET = pd.read_csv(os.path.join(os.getcwd(), "csv", "reference_dataset.csv"), index_col=0)
REF_SET.reset_index()

for FOLDER_NAME in folders:
    INPUT_PATH = os.path.join(os.getcwd(), 'images', FOLDER_NAME)
    FOLDER_PATH = os.path.join(OUTPUT_FOLDER, FOLDER_NAME)
    CSV_PATH = os.path.join(FOLDER_PATH, FOLDER_NAME + '_CSV')
    RESTORED_MASK_PATH = os.path.join(FOLDER_PATH, FOLDER_NAME + '_restored_mask')
    
    if save_noise:
        NOISE_OUTS = os.path.join(FOLDER_PATH, FOLDER_NAME + "_noise_outs")
        if not os.path.exists(NOISE_OUTS):
            os.mkdir(NOISE_OUTS)
    
    FACES_DF = None
    for file in os.listdir(CSV_PATH):
        if "dataset_pixels" in file and file.endswith(".csv"):
            FACES_DF = pd.read_csv(os.path.join(os.getcwd(), CSV_PATH, file))
            cols_to_clip = ["x1", "y1", "x2", "y2", "x1_pad", "y1_pad", "x2_pad", "y2_pad"]
            FACES_DF.loc[:, cols_to_clip] = FACES_DF.loc[:, cols_to_clip].clip(lower = 0)
            break
    else:
        fails += FOLDER_NAME
        print("An exception occurred: dataset_pixels csv not found for", FOLDER_NAME)
        continue
    
    print("Working on", FOLDER_NAME, "folder")
    pipeline(model, device)
    # this is commented out so we can see any bugs
#     try:
#         pipeline(model, device)
#     except:
#         fails += FOLDER_NAME
#         print("An exception occurred for", FOLDER_NAME)

##### ___