In [None]:
import os
import tensorflow as tf
import numpy as np
import cv2
import matplotlib.pyplot as plt
from multiprocessing import Pool
from tqdm.auto import tqdm
import random
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import ResNet50V2 as pretrained
from tensorflow.keras.utils import to_categorical, Sequence
from tensorflow.keras.layers import GlobalAveragePooling2D,Dense,Dropout,ReLU
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from functools import lru_cache
import multiprocessing
import uuid

from utils import *

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"]="1"

In [None]:
WIDTH = 64
HEIGHT = 64

In [None]:
def read_file(filepath, flip=False):
    img = cv2.imread(filepath)
    img = cv2.resize(img, (WIDTH, HEIGHT))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    if flip:
        img = cv2.flip(img, 1)
    return img


def trim_random(folder):
    try:
        imgs=os.listdir(folder)
        img_name=random.choice(imgs)
        img_path=os.path.join(folder, img_name)
        img = cv2.imread(img_path)
        size = random.randint(64,min(img.shape[0],img.shape[1]))
        x = random.randint(0,img.shape[1]-size)
        y = random.randint(0,img.shape[0]-size)
        img = img[y:y+size, x:x+size,:]
        img = cv2.resize(img, (WIDTH, HEIGHT))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        return img
    except:
        return None

In [None]:
def clean(Height, Width, channel=3):
    model =  pretrained(weights="imagenet", include_top=False, input_shape=(Height,Width,channel))
    x = model.output  
    x = GlobalAveragePooling2D()(x)
    x = Dropout(rate=0.7)(x)
    x = Dense(512)(x)
    x = Dropout(0.5)(x)
    x = ReLU(0.2)(x)
    outputs = Dense(2, activation="softmax")(x)
    model = Model(inputs=model.input, outputs=outputs)
    return model

In [None]:
class DataLoader(Sequence):
    
    def __init__(self, true_files, false_folder, batch_size, height, width, training=True):
        self.X=true_files
        self.false_folder=false_folder
        self.width=width
        self.height=height
        self.batch_size=batch_size
        self.training=training

    def __len__(self):
        return int(np.ceil(len(self.X) / self.batch_size))
    
    def __getitem__(self, idx):
        img_paths = self.X[idx * self.batch_size:(idx + 1) * self.batch_size]
        
        
        imgs_true = [read_file(path, flip=False) for path in img_paths] 
        if self.training:
            imgs_true += [read_file(path, flip=True) for path in img_paths]
            imgs_false = [trim_random(self.false_folder) for i in range(len(imgs_true))]
            imgs_false = [img for img in imgs_false if img is not None]
        else:
            imgs_false = []
        X = imgs_true + imgs_false
        y = [1 for i in range(len(imgs_true))] + [0 for i in range(len(imgs_false))]
        
        X = np.asarray(X).astype(np.float32)/255.0
        y = to_categorical(y)
        return X, y

In [None]:
def cleanup(true_files, false_folder="./input", k=5):
    labels=[]
    workers=multiprocessing.cpu_count()
    batch=len(true_files)//(k-1)
    for i in tqdm(range(k)):
        true_files_train = true_files[:i*batch] + true_files[(i+1)*batch:]
        true_files_test = true_files[i*batch:(i+1)*batch]
        if len(true_files_test)==0:
            continue
        train_gen = DataLoader(true_files_train, false_folder, 256, HEIGHT, WIDTH, training=True)
        test_gen = DataLoader(true_files_test, false_folder, 256, HEIGHT, WIDTH, training=False)
        model = clean(HEIGHT, WIDTH)
        model.compile(optimizer=Adam(),loss="categorical_crossentropy",metrics=["acc"])
        model.fit(train_gen, validation_data=test_gen, epochs=3, workers=workers, use_multiprocessing=True)
        label = model.predict(test_gen, verbose=1)
        label = np.argmax(label, axis=1)
        label = label.tolist()
        assert len(label) == len(true_files_test)
        labels += label
    return labels

In [None]:
true_files = os.listdir("./faces")
true_paths=[os.path.join("./faces", name) for name in true_files]
labels = cleanup(true_paths, "./input", k=3)

In [None]:
img_false=np.array(true_files)[np.array(labels)==0]

In [None]:
row = 12
col = 14
plt.figure(figsize=(col*3, row*3))
plt.suptitle("false images: {0}".format(img_false.shape[0]), fontsize=20)
for i in range(row * col):
    plt.subplot(row, col, i+1)
    filename=random.choice(img_false)
    filepath=os.path.join("./faces",filename)
    img = read_file(filepath)
    plt.imshow(img.astype(np.uint8))
    plt.axis('off')

In [None]:
for name in img_false:
    path = os.path.join("./faces",name)
    os.remove(path)