In [None]:
!pip install -r requirements.txt

In [None]:
import pandas as pd
from PIL import Image
import torchvision
from tqdm import tqdm
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_datasets as tfds
import pydot
import matplotlib.pyplot as plt

In [None]:
path = '/home/jovyan/hfactory_magic_folders/tooling_for_the_data_scientist/deepfakes_detection/'

In [None]:
train = pd.read_csv(path + 'train.csv')
test  = pd.read_csv(path + 'test.csv')
train.head()

In [None]:
train['label'].value_counts()

In [None]:
# Split the images between fake and not in two folders
# Real images
#for id in tqdm(train[train.label==0].image_id):
#    im = Image.open(f'{root}/images/{id}.jpg')
#    im.save(f'train/real_images/{id}.jpg')

In [None]:
# Fake images
#for id in tqdm(train[train.label==1].image_id):
#    im = Image.open(f'{root}/images/{id}.jpg')
#    im.save(f'train/fake_images/{id}.jpg')

In [None]:
image_size = (180, 180)
batch_size = 32
train_path = '/home/jovyan/hfactory_magic_folders/tooling_for_the_data_scientist/group_shared_workspace/hufflepuff_project/train'

train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    train_path,
    validation_split=0.2,
    subset="training",
    seed=1337,
    image_size=image_size,
    batch_size=batch_size,
)
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    train_path,
    validation_split=0.2,
    subset="validation",
    seed=1337,
    image_size=image_size,
    batch_size=batch_size,
)

In [None]:
y = np.concatenate([y for x, y in train_ds], axis=0)

In [None]:
np.unique(y)

In [None]:
plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(int(labels[i]))
        plt.axis("off")

In [None]:
train_ds = train_ds.prefetch(buffer_size=32)
val_ds = val_ds.prefetch(buffer_size=32)

In [None]:
data_augmentation = keras.Sequential(
    [
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.1),
    ]
)

In [None]:
def make_model(input_shape, num_classes):
    inputs = keras.Input(shape=input_shape)
    
    x = data_augmentation(inputs)
    x = layers.Rescaling(1./255)(x)
    
    # Entry block
    x = layers.Rescaling(1.0 / 255)(inputs)
    x = layers.Conv2D(32, 3, strides=2, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    x = layers.Conv2D(64, 3, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    previous_block_activation = x  # Set aside residual

    for size in [128, 256, 512, 728]:
        x = layers.Activation("relu")(x)
        x = layers.SeparableConv2D(size, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.Activation("relu")(x)
        x = layers.SeparableConv2D(size, 3, padding="same")(x)
        x = layers.BatchNormalization()(x)

        x = layers.MaxPooling2D(3, strides=2, padding="same")(x)

        # Project residual
        residual = layers.Conv2D(size, 1, strides=2, padding="same")(
            previous_block_activation
        )
        x = layers.add([x, residual])  # Add back residual
        previous_block_activation = x  # Set aside next residual

    x = layers.SeparableConv2D(1024, 3, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    x = layers.GlobalAveragePooling2D()(x)
    if num_classes == 2:
        activation = "sigmoid"
        units = 1
    else:
        activation = "softmax"
        units = num_classes

    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(units, activation=activation)(x)
    return keras.Model(inputs, outputs)

In [None]:
model = make_model(input_shape=image_size + (3,), num_classes=2)
#keras.utils.plot_model(model, show_shapes=True)

In [None]:
epochs = 10

callbacks = [
    keras.callbacks.ModelCheckpoint("save_at_{epoch}.h5"),
]
model.compile(
    optimizer=keras.optimizers.Adam(1e-3),
    loss="binary_crossentropy",
    metrics=["accuracy"],
)
model.fit(
    train_ds, epochs=epochs, callbacks=callbacks, validation_data=val_ds,
)

In [None]:
for image_id in test.image_id:
    im = Image.open(path + 'images/' + image_id + '.jpg')
    im.save(f'test_images/{image_id}.jpg')

In [None]:
test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    "full_test_images",
    seed=1337,
    image_size=image_size,
    batch_size=batch_size)

test_ds = test_ds.prefetch(buffer_size=32)

In [None]:
predictions = model.predict(test_ds)

In [None]:
np.sum((predictions > .5)*1)

In [None]:
final_ds = pd.concat([test.image_id, pd.DataFrame((predictions > .5)*1)], axis=1) 

In [None]:
final_ds = final_ds.rename(columns={0: "label"})

In [None]:
final_ds.to_csv('/home/jovyan/group4-deepfakes-detection/submission1.csv', index=False)