In [None]:
import numpy as np
import pandas as pd
import torch
import os
import random
from shutil import copyfile
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torchvision
import torch.optim as optim
import torchvision.models as models
from tqdm import tqdm
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torchvision import datasets
from torch.utils.data import random_split
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, confusion_matrix, roc_curve, f1_score, auc, precision_score, recall_score
import matplotlib.pyplot as plt
import torchvision.models as models
from PIL import Image
import cv2
from torchvision.utils import save_image
from true_classify import *
from Utils import *
from anonymization_methods import *
from datasets import *
from torchvision.transforms.functional import to_pil_image
from collections import Counter
import matplotlib.image as mpimg
import time
import matplotlib.pyplot as plt
import xlrd
import openpyxl
import copy
import gc

from tqdm import tqdm

In [None]:
# Set the device to be used by pytorch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# define the path of the data, the model, and the output. Also define the number the the classes in the dataset 
source_path = 'Path of the Data to be anonimyzed'


class_names = [folder for folder in os.listdir(source_path) if os.path.isdir(os.path.join(source_path, folder))]
num_classes = len(class_names)

file_list = os.listdir(source_path)
model_dir = 'Path of the trained FR model'

output_path = 'Path to save the anonimyzed images'

save_roc_dir = 'Path to save the ROC'

excel_file_path = 'Path to save the excel file with the numberical results of accuracy and f1_score'


In [None]:
# Define a new transform with additional data augmentations
transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
])

In [None]:
# define the model and load the pretrained model

model = models.convnext_base(pretrained=True)
model.classifier[2]=nn.Linear(1024,num_classes)
model.load_state_dict(torch.load('Path of the trained FR model.pt'))

model.to(device)

In [None]:
# define the hyperparameter to be used by the EDI_Anon to calculate the loss in order to anonimyze the images

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.001)

In [None]:
# define the q which in 100-the percentage of the affect onn the images
q=90

In [None]:
#Setting Parameter values

#For Simple Blur which is k in the paper
blur_kernel_size = 40

#For Simple Pixelate which is n in the paper
pixel_size = 20



#For DP Pix
b = 32
m = 16
eps = 100

#For DP blur
b0 = 32 #recommended half of the original b
k = 99
sigma = 0

In [None]:
# Compute the original accuracy and f1_score of the original data

start_time = time.time()
new_batch_size = 1
new_test_path = source_path
our_test_loader = create_test_loader(new_test_path, new_batch_size)
final_acc, correct_examples, labels, logits = test_images_classification(model, device, our_test_loader, excel_file_path, save_roc_dir)
prev_acc = final_acc
print(source_path)
print(prev_acc*100)

end_time = time.time()
acc_time = end_time - start_time
print(f"\nAcc execution time: {acc_time} seconds")

In [None]:
start_time = time.time()

# Setting high number of iteration to give the EDI_Anon the time it needs to effectivly anonimyze the dataset
for itera in range(0,200000):
    # Iterate through all correct examples
    print('Starting the anonymization for the interation number '+str(itera)+' ------------->')
    for i in tqdm(range(len(correct_examples))):
        x, correct_label, prediction = correct_examples[i], labels[i], logits[i]
        y = get_second_largest(logits[i])
        y = torch.tensor([y]).to(device)

        # Create the output directory for this iteration
        iteration_out_path = f"{output_path}/Iteration_{itera}/"

        class_label = labels[i]
        class_subfolder = class_names[class_label]            

        class_output_path = os.path.join(iteration_out_path, class_subfolder)
        os.makedirs(class_output_path, exist_ok=True)

        # Clone the original image and enable image gradient computation
        annonymized_image = x.clone()
        annonymized_image.requires_grad = True

        # Calculate the loss based on the model, image, and criterion
        output, loss = calculate_loss(model, annonymized_image, y, criterion)

        # test if the image is correctly classified then we need to anonimyze it
        if(output.item() == labels[i].item()):
            model.zero_grad()
            loss.backward()
            img_grad = annonymized_image.grad.data

            # select the anonimyzation function you want to use

#             perturbed = create_pixelated_image(x, pixel_size)    
#             perturbed = create_blurred_image(x, blur_kernel_size)      
#             perturbed = create_dp_pixelated_image(x, b, m, eps)
#             perturbed = create_dp_blurred_image(x, b0, m, eps, k, sigma)


            
            


            # Calculate noise based on the original and perturbed images
            noise = calculate_noise(x, perturbed, device)

            # Optimize the gradients using the quantile value (q)
            optimized_gradients = optimize_gradients(img_grad, q)
            optimized_gradients = optimized_gradients.to(device)
            optimized_gradients = optimized_gradients.cpu().detach().numpy().squeeze()
            optimized_gradients = np.transpose(optimized_gradients, (1, 2, 0))

            # Update the noise using optimized gradients
            updated_noise = noise * optimized_gradients


            # Create the anonymized image by subtracting/adding the updated noise from the original image
            numpy_image = x.cpu().detach().numpy().squeeze()
            cv2_image = np.transpose(numpy_image, (1, 2, 0))
            cv2_image = cv2.cvtColor(cv2_image, cv2.COLOR_BGR2RGB)
            
            annonymized_image = cv2_image - updated_noise 
            
            # unnormalizing the image to save it with the correct colors
            annonymized_image = annonymized_image*255

            
            # Save the anonymized image to the output directory
            save_image(annonymized_image, i, correct_label, class_output_path, pixel_size, 100-q)  

        # If the model didn't correctly calssify the image then save it as it is
        else:
            numpy_image = x.cpu().detach().numpy().squeeze()
            cv2_image = np.transpose(numpy_image, (1, 2, 0))
            cv2_image = cv2.cvtColor(cv2_image, cv2.COLOR_BGR2RGB)
            cv2_image = cv2.normalize(cv2_image, None, alpha = 0, beta = 255, norm_type = cv2.NORM_MINMAX, dtype = cv2.CV_32F)
            save_image(cv2_image, i, correct_label, class_output_path, pixel_size, 100-q)


    # test the model performance on the anonimyzed data at the end of each iteration
    our_test_loader = create_test_loader(iteration_out_path, batch_size=1)
    accuracy, correct_examples, labels, logits = test_images_classification(model, device, our_test_loader, excel_file_path, save_roc_dir)
    print(f"\nAccuracy: {accuracy*100} %")

    # If we have achieved the requiered RIR, then end the EDI_Anon processes
    if(accuracy*100 < 'set the RIR you want to achieve'):
        break

print(f"All images processed for iteration number {itera}.")
end_time = time.time()
Anon_execution_time = end_time - start_time
print(f"\nAnon Time: {Anon_execution_time} seconds")