In [2]:
from deepface import DeepFace
import torch
from tqdm import tqdm
import os, random
import math
from pathlib import Path
from PIL import Image
from numpy import var

In [3]:
#DATA
THRESHOLD = [0.3, 0.35 ,0.4, 0.45, 0.5, 0.6, 0.7, 0.8]

DATASET = "GoogleDataset\GDatasetSplit"

models = [
  "VGG-Face",
  "Facenet",
  "Facenet512",
  "OpenFace",
  "DeepFace",
  "DeepID",
  "ArcFace",
  "Dlib",
  "SFace",
  "GhostFaceNet",
]

Function to generate the embeddings

In [4]:
#function that generates embeddings for a given image
def Represent(path, model = 0):
    embedding_objs = DeepFace.represent(
    img_path = path,
    model_name= models[model],
    enforce_detection=False,
    )
    SUT_embeddings = embedding_objs[0]['embedding']
    return torch.FloatTensor(SUT_embeddings)

#Utility function that counts the number of files in a directory

def count_files_in_directory(directory):
    # Create a Path object
    path = Path(directory)
    # Count only files (ignoring subdirectories)
    return sum(1 for f in path.iterdir() if f.is_file())


In [5]:
def CompareRefSet(identityPath, samplePath, threshold):
    referenceSet = os.path.join(identityPath, "real")
    underTest= Represent(samplePath)
    
    currentSimilarity = 0
    for file in os.listdir(referenceSet):
        refSample = os.path.join(referenceSet, file)
        if(refSample == samplePath):
            continue

        refEmbedding = Represent(refSample)   

        similarity = torch.nn.functional.cosine_similarity(underTest, refEmbedding, dim=0)
        if(similarity > currentSimilarity):
            currentSimilarity = similarity  

    if(currentSimilarity > threshold):
        return (True, currentSimilarity)
    else:
        return (False, currentSimilarity)
    

            

def ProcessDatasetFakes(threshold, showTqdm = True):
    Result = []
    Acc = []
    if(showTqdm):
        list = tqdm(os.listdir(DATASET), desc="Processing Fakes")
    else:
        list = os.listdir(DATASET)
    for file in list:
        identity = os.path.join(DATASET, file)
        expectedResult = False

        fakeFolder = os.path.join(identity, "fake")
        underTest = os.path.join(fakeFolder,random.choice(os.listdir(fakeFolder)))

        result = CompareRefSet(identity, underTest, threshold)

        if(result[0] == expectedResult):
            Result.append(1)
        else:
            Result.append(0)

        Acc.append(result[1])

    return (Result, Acc)
            

def ProcessDatasetReal(threshold, showTqdm = True):
    Result = []
    Acc = []
    if(showTqdm):
        list = tqdm(os.listdir(DATASET), desc="Processing Reals")
    else:
        list = os.listdir(DATASET)

    for file in list:
        identity = os.path.join(DATASET, file)
        expectedResult = True

        realFolder = os.path.join(identity, "real")

        if(count_files_in_directory(realFolder) > 1):
            underTest = os.path.join(realFolder, random.choice(os.listdir(realFolder)))
            result = CompareRefSet(identity, underTest, threshold) 
            if(result[0] == expectedResult):
                Result.append(1)
            else:
                Result.append(0)
            
            Acc.append(result[1])


    return (Result, Acc)


def PrintResults(fakeResult, RealResult, t):     

    print(f"Test using {t} as threshold is finished")

    print(f"Accuracy: {sum(fakeResult[0])/len(fakeResult[0])} in detecting fakes over {len(fakeResult[0])} identities. The average similarity was {sum(fakeResult[1])/len(fakeResult[1])} with a variance of {var(fakeResult[1])}")
    
    print(f"Accuracy: {sum(RealResult[0])/len(RealResult[0])} in detecting reals over {len(RealResult[0])} identities. The average similarity was {sum(RealResult[1])/len(RealResult[1])} with a variance of {var(RealResult[1])}")

In [6]:
for t in THRESHOLD:
    PrintResults(ProcessDatasetFakes(t, False), ProcessDatasetReal(t, False), t)

Test using 0.3 as threshold is finished
Accuracy: 0.7395833333333334 in detecting fakes over 96 identities. The average similarity was 0.22935403883457184 with a variance of 0.016292855143547058
Accuracy: 0.7241379310344828 in detecting reals over 29 identities. The average similarity was 0.5059651136398315 with a variance of 0.05290611833333969
Test using 0.35 as threshold is finished
Accuracy: 0.8333333333333334 in detecting fakes over 96 identities. The average similarity was 0.23059342801570892 with a variance of 0.016379594802856445
Accuracy: 0.7241379310344828 in detecting reals over 29 identities. The average similarity was 0.5140664577484131 with a variance of 0.04630586504936218
Test using 0.4 as threshold is finished
Accuracy: 0.8645833333333334 in detecting fakes over 96 identities. The average similarity was 0.2278105467557907 with a variance of 0.01652688719332218
Accuracy: 0.6206896551724138 in detecting reals over 29 identities. The average similarity was 0.4692751765251