In [1]:
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras.layers import Conv2D, Dense, AveragePooling2D, MaxPooling2D, BatchNormalization, Input, Activation, Concatenate, Dropout, Flatten
from tensorflow.keras.models import Model

import numpy as np
import os, shutil, random, cv2, zipfile, glob, math
from io import BytesIO
from PIL import Image

In [2]:
print("Tensorflow-version:", tf.__version__)
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


Tensorflow-version: 2.11.0
Num GPUs Available:  2


In [3]:
SEED = 0
random.seed(0)
tf.keras.utils.set_random_seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

### Download Dataset

In [4]:
!pip install gdown

Collecting gdown
  Downloading gdown-4.7.1-py3-none-any.whl (15 kB)
Installing collected packages: gdown
Successfully installed gdown-4.7.1
[0m

In [5]:
!gdown --id 1INBdxjJJ71nmJgnb3bRYcfIyJaj1gWHi --folder

Retrieving folder list
Retrieving folder 1Yg_JelxIM4OAmCMie7WuMaWsvcASVw2M Test_Data
Retrieving folder 1-wurQ-IzlmIL1Z9pqZlQEea90UcBg0EU Deepfake
Processing file 1NRUjp32lszbt7P81Q4ndPmRlgWdn9R9m AttGAN_Aug_Test.zip
Processing file 1l3BlToahctj89NnQhM0TZ0yyoU6_4LuJ GDWCT_Aug_Test.zip
Processing file 1fdB5w6czZPtd3TdkGWnDWDbayAGO3CJH StarGAN_Aug_Test.zip
Processing file 1EtdeJLXPUPZmhQ9Ai42uO86GFgpTOnci STYLEGAN_Aug_Test.zip
Processing file 1m0H21IKV3D4H3sOVBsfaPITknZzUSYhG STYLEGAN2_Aug_Test.zip
Retrieving folder 1bmI5AYVLZGt8jqpvMsB6rQHGUQpA8t0y Real
Processing file 1LExW32wPYigOT_ZbjnF1KWnxiIAXsWss CelebA_Aug_Test.zip
Processing file 1Gy53cQcZ4KlLqqjjScssZBQZpUZ_aXDj ffhq_aug_test.zip
Retrieving folder 1w3-WRvXuet4KT03lY2Ws0hM_1Ej8Ptwf Train_Data
Retrieving folder 14ZQpszprwsZXMf4BcjqnUsfsM8lNcug4 Deepfake
Processing file 1EfjeU63IMpLGUszZ0pP2IKmRs6RChtfJ AttGAN_Aug_Train.zip
Processing file 15rLJK8arkytHq6VXJfLeWHCrq1P8uMDx GDWCT_Aug_Train.zip
Processing file 1qeqz0Qu4_kVaafNTRc20kt

In [6]:
# TODO - Add Error Handling (Yea I'm Lazy)
def extract_images(zip_files, amount_per_zip, path_to_save):
  for arch in zip_files:
    archive = zipfile.ZipFile(arch, 'r')
    all_files = [x for x in archive.namelist() if x.endswith('jpg')]

    try:
      files_to_extract = random.sample(all_files, amount_per_zip)
    except:
      files_to_extract = all_files
    
    #Extract the selected images to the new directory
    for image in files_to_extract:
        # Open the image file in the zip file
        with archive.open(image) as image_file:
            # Read the image data into memory
            image_data = BytesIO(image_file.read())

        # Open the image data as a PIL Image object
        with Image.open(image_data) as im:
            # Save the image to the new directory
            im.save(os.path.join(path_to_save, os.path.basename(image)))

### Get Zip Files Directories

In [7]:
root_train_real = '/kaggle/working/Albumentations_data/Train_Data/Real'
root_train_fake = '/kaggle/working/Albumentations_data/Train_Data/Deepfake'
root_test_real = '/kaggle/working/Albumentations_data/Test_Data/Real'
root_test_fake = '/kaggle/working/Albumentations_data/Test_Data/Deepfake'
root_val_real = '/kaggle/working/Albumentations_data/Validation_Data/Real'
root_val_fake = '/kaggle/working/Albumentations_data/Validation_Data/Deepfake'

In [8]:
# Get ZIP files 
real_train_zip_files = glob.glob(root_train_real + "/*.zip")
fake_train_zip_files = glob.glob(root_train_fake + "/*.zip")
real_test_zip_files = glob.glob(root_test_real + "/*.zip")
fake_test_zip_files = glob.glob(root_test_fake + "/*.zip")
real_val_zip_files = glob.glob(root_val_real + "/*.zip")
fake_val_zip_files = glob.glob(root_val_fake + "/*.zip")

### Extract the dataset

In [None]:
## Create necessary folders
!mkdir '/kaggle/working/ExtractedDataset'

!mkdir '/kaggle/working/ExtractedDataset/Train'
!mkdir '/kaggle/working/ExtractedDataset/Train/Real'
!mkdir '/kaggle/working/ExtractedDataset/Train/Fake'

!mkdir '/kaggle/working/ExtractedDataset/Test'
!mkdir '/kaggle/working/ExtractedDataset/Test/Real'
!mkdir '/kaggle/working/ExtractedDataset/Test/Fake'

!mkdir '/kaggle/working/ExtractedDataset/Validation'
!mkdir '/kaggle/working/ExtractedDataset/Validation/Real'
!mkdir '/kaggle/working/ExtractedDataset/Validation/Fake'

In [11]:
extract_images(real_train_zip_files, 2500, '/kaggle/working/ExtractedDataset/Train/Real')

In [12]:
extract_images(fake_train_zip_files, 1000, '/kaggle/working/ExtractedDataset/Train/Fake')

In [13]:
extract_images(real_test_zip_files, 1500, '/kaggle/working/ExtractedDataset/Test/Real')

In [14]:
extract_images(fake_test_zip_files, 600, '/kaggle/working/ExtractedDataset/Test/Fake')

In [15]:
extract_images(real_val_zip_files, 1500, '/kaggle/working/ExtractedDataset/Validation/Real')

In [16]:
extract_images(fake_val_zip_files, 600, '/kaggle/working/ExtractedDataset/Validation/Fake')

In [7]:
print(len(glob.glob('/kaggle/working/ExtractedDataset/Train/Real/*.jpg')))
print(len(glob.glob('/kaggle/working/ExtractedDataset/Train/Fake/*.jpg')))
print(len(glob.glob('/kaggle/working/ExtractedDataset/Test/Real/*.jpg')))
print(len(glob.glob('/kaggle/working/ExtractedDataset/Test/Fake/*.jpg')))
print(len(glob.glob('/kaggle/working/ExtractedDataset/Validation/Real/*.jpg')))
print(len(glob.glob('/kaggle/working/ExtractedDataset/Validation/Fake/*.jpg')))

6588
6558
3000
2996
3000
2995


In [8]:
train_ds = tf.keras.utils.image_dataset_from_directory('/kaggle/working/ExtractedDataset/Train/',
  seed=0,
  image_size=(160, 160),
  batch_size=64)

Found 13146 files belonging to 2 classes.


In [9]:
val_ds = tf.keras.utils.image_dataset_from_directory('/kaggle/working/ExtractedDataset/Validation/',
  seed=0,
  image_size=(160, 160),
  batch_size=64)

Found 5995 files belonging to 2 classes.


In [10]:
test_ds = tf.keras.utils.image_dataset_from_directory('/kaggle/working/ExtractedDataset/Test/',
  seed=0,
  image_size=(160, 160),
  batch_size=64)

Found 5996 files belonging to 2 classes.


In [11]:
# Pad the images to the maximum size
def resize_image(image, label):
    padded_image = tf.image.resize(image, [224,224])
    return padded_image, label

train_ds = train_ds.map(resize_image)
val_ds = val_ds.map(resize_image)
test_ds = test_ds.map(resize_image)

In [13]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.prefetch(buffer_size=AUTOTUNE)

In [14]:
gpus = tf.config.list_logical_devices('GPU')
STRATEGY = tf.distribute.MirroredStrategy(gpus) # default distribution strategy in Tensorflow. Works on CPU and single GPU.
N_REPLICAS = STRATEGY.num_replicas_in_sync

In [15]:
def DenseBlock(x, growth_rate, layers):

    x_input = x
    for _ in range(layers):
        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Conv2D(growth_rate * 4, (1, 1), padding='same', kernel_initializer='he_normal', use_bias=False)(x)

        x = BatchNormalization()(x)
        x = Activation('relu')(x)
        x = Conv2D(growth_rate, (3, 3), padding='same', use_bias=False, kernel_initializer='he_normal')(x)

        x = Concatenate()([x_input, x])

    return x

In [16]:
def TransitionBlock(x, filter, compression):
    x = BatchNormalization()(x)
    x = Conv2D(filter * compression, kernel_size=(1,1), kernel_initializer='he_normal', padding='same', use_bias=False)(x)
    x = AveragePooling2D(pool_size=(2,2), strides=(2,2))(x)
    return x

In [17]:
layers = [6, 12, 64, 48]
compression = 0.3
growth_rate = 64

#input layer
with STRATEGY.scope():
    input = Input(shape=(224, 224, 3))
    input = tf.image.per_image_standardization(input)

    ## First Convolutional Layer
    x = Conv2D(128, (7,7), strides=(2,2), kernel_initializer='he_normal', padding='same', use_bias=False,)(input)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(3, 3), strides=(2,2),  padding='same')(x)

    # Input Dense Blocks
    for i, n_layer in enumerate(layers[:-1]):
        x = DenseBlock(x, growth_rate, n_layer)
        x = TransitionBlock(x, growth_rate, compression)

    # Output Dense Block
    x = DenseBlock(x, layers[-1], growth_rate)

    # Classification Layer
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = AveragePooling2D(pool_size=(7,7), strides=(1,1), padding='same')(x)
    x = Dense(1000, activation='softmax')(x)
    x = Flatten()(x)
    
    # Dropout to prevent Overfitting
    x = tf.keras.layers.Dropout(0.50)(x)
    x = Dense(1, activation='sigmoid')(x)
    
    opt = tf.keras.optimizers.Adam(learning_rate=0.1)

    metrics = [
        tfa.metrics.F1Score(num_classes=1),
        tf.keras.metrics.Precision(),
        tf.keras.metrics.Recall(),
        tf.keras.metrics.AUC(),
        tf.keras.metrics.BinaryAccuracy(),
    ]

    loss = tf.keras.losses.BinaryCrossentropy(from_logits=False)
    
    # Define the model
    model = Model(inputs=input, outputs=x)
    
    model.compile(optimizer=opt,loss=loss, metrics=metrics)

#model.summary()

In [18]:
N_EPOCHS=10
VERBOSE=1 
N_REPLICAS =2
LR_MAX = 0.1 * N_REPLICAS
WD_RATIO = 0.01
N_WARMUP_EPOCHS = 0

In [19]:
# Learning rate scheduler with logaritmic warmup and cosine decay
def lrfn(current_step, num_warmup_steps, lr_max, num_cycles=0.50, num_training_steps=N_EPOCHS):
    
    if current_step < num_warmup_steps:
        return lr_max * 0.10 ** (num_warmup_steps - current_step)
    else:
        progress = float(current_step - num_warmup_steps) / float(max(1, num_training_steps - num_warmup_steps))
        return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress))) * lr_max

In [20]:
# Learning Rate 
# Learning rate for encoder
LR_SCHEDULE = [lrfn(step, num_warmup_steps=N_WARMUP_EPOCHS, lr_max=LR_MAX, num_cycles=0.50) for step in range(N_EPOCHS)]
lr_callback = tf.keras.callbacks.LearningRateScheduler(lambda step: LR_SCHEDULE[step], verbose=0)

In [21]:
# Tensorflow Learning Rate Scheduler does not update weight decay, need to do it manually in a custom callback
class WeightDecayCallback(tf.keras.callbacks.Callback):
    def __init__(self, wd_ratio=WD_RATIO):
        self.step_counter = 0
        self.wd_ratio = wd_ratio
    
    def on_epoch_begin(self, epoch, logs=None):
        model.optimizer.weight_decay = model.optimizer.learning_rate * self.wd_ratio
        print(f'learning rate: {model.optimizer.learning_rate.numpy():.2e}, weight decay: {model.optimizer.weight_decay.numpy():.2e}')

In [22]:

trained_model = model.fit(train_ds, epochs=N_EPOCHS, validation_data=val_ds, batch_size=64, callbacks = [lr_callback, WeightDecayCallback()])


learning rate: 2.00e-01, weight decay: 2.00e-03
Epoch 1/10
learning rate: 1.95e-01, weight decay: 1.95e-03
Epoch 2/10
learning rate: 1.81e-01, weight decay: 1.81e-03
Epoch 3/10
learning rate: 1.59e-01, weight decay: 1.59e-03
Epoch 4/10
learning rate: 1.31e-01, weight decay: 1.31e-03
Epoch 5/10
learning rate: 1.00e-01, weight decay: 1.00e-03
Epoch 6/10
learning rate: 6.91e-02, weight decay: 6.91e-04
Epoch 7/10
learning rate: 4.12e-02, weight decay: 4.12e-04
Epoch 8/10
learning rate: 1.91e-02, weight decay: 1.91e-04
Epoch 9/10
learning rate: 4.89e-03, weight decay: 4.89e-05
Epoch 10/10


In [26]:
model.evaluate(test_ds)



[0.695233166217804,
 array([0.6669631], dtype=float32),
 0.5003335475921631,
 1.0,
 0.5,
 0.5003335475921631]

In [27]:
y_pred = []  # store predicted labels
y_true = []  # store true labels

# iterate over the dataset
for image_batch, label_batch in test_ds:   # use dataset.unbatch() with repeat
   # append true labels
   y_true.append(label_batch)
   # compute predictions
   preds = model.predict(image_batch)
   # append predicted labels
   y_pred.append(np.argmax(preds, axis = - 1))

# convert the true and predicted labels into tensors
correct_labels = tf.concat([item for item in y_true], axis = 0)
predicted_labels = tf.concat([item for item in y_pred], axis = 0)



In [46]:
from sklearn.metrics import precision_score, recall_score, f1_score

# compute precision, recall, and F-score for each class
precision_per_class = precision_score(correct_labels, predicted_labels, zero_division=1, average=None)
recall_per_class = recall_score(correct_labels, predicted_labels, zero_division=1, average=None)
f_score_per_class = f1_score(correct_labels, predicted_labels, zero_division=1, average=None)

# compute macro-average precision, recall, and F-score
precision_macro = precision_score(correct_labels, predicted_labels, zero_division=1, average='macro')
recall_macro = recall_score(correct_labels, predicted_labels, zero_division=1, average='macro')
f_score_macro = f1_score(correct_labels, predicted_labels, zero_division=1, average='macro')

# print the results
print('Precision per class:', precision_per_class)
print('Recall per class:', recall_per_class)
print('F-score per class:', f_score_per_class)
print('Macro-average precision:', precision_macro)
print('Macro-average recall:', recall_macro)
print('Macro-average F-score:', f_score_macro)


Precision per class: [0.49966644 1.        ]
Recall per class: [1. 0.]
F-score per class: [0.66637011 0.        ]
Macro-average precision: 0.7498332221480988
Macro-average recall: 0.5
Macro-average F-score: 0.33318505338078286
