In [None]:
#Imports all the libraries that will be used

import os
import glob
import cv2
import numpy as np
import seaborn as sns
from tensorflow.keras import layers
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.callbacks import Callback, ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential, Model, load_model
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import metrics
import tensorflow as tf
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, Dense
from tensorflow.keras.layers import AvgPool2D, GlobalAveragePooling2D, MaxPool2D
from tensorflow.keras.models import Model
from tensorflow.keras.layers import ReLU, concatenate
import tensorflow.keras.backend as K
from sklearn.metrics import confusion_matrix, classification_report

# **EXPLORING THE DATASET**

In [None]:
#shows how many images are in each directory
print(len(os.listdir("../input/140k-real-and-fake-faces/real_vs_fake/real-vs-fake/test/fake")))
print(len(os.listdir("../input/140k-real-and-fake-faces/real_vs_fake/real-vs-fake/test/real")))
print(len(os.listdir("../input/140k-real-and-fake-faces/real_vs_fake/real-vs-fake/train/fake")))
print(len(os.listdir("../input/140k-real-and-fake-faces/real_vs_fake/real-vs-fake/train/real")))
print(len(os.listdir("../input/140k-real-and-fake-faces/real_vs_fake/real-vs-fake/valid/fake")))
print(len(os.listdir("../input/140k-real-and-fake-faces/real_vs_fake/real-vs-fake/valid/real")))


In [None]:
path  = '/kaggle/input/140k-real-and-fake-faces/real_vs_fake/real-vs-fake/'

In [None]:
train_dataset = pd.read_csv("../input/140k-real-and-fake-faces/train.csv") 
train_dataset.head()

In [None]:
valid_dataset = pd.read_csv("../input/140k-real-and-fake-faces/valid.csv")
valid_dataset.head()

In [None]:
test_dataset = pd.read_csv("../input/140k-real-and-fake-faces/test.csv")
test_dataset.head()

In [None]:
df_dataset = {"image_path":[],"img_status":[],"where":[]}
for where in os.listdir(path):
    for status in os.listdir(path+"/"+where):
        for image in glob.glob(path+where+"/"+status+"/"+"*.jpg"):
            df_dataset["image_path"].append(image)
            df_dataset["img_status"].append(status)
            df_dataset["where"].append(where)
df_dataset = pd.DataFrame(df_dataset)
df_dataset

In [None]:
df_dataset.isnull().sum()

In [None]:
df_dataset.info() #Gives summary info of the dataset

In [None]:
len(valid_dataset)

In [None]:
len(test_dataset)

In [None]:
len(train_dataset)

In [None]:
len(df_dataset) #check the count

In [None]:
df_dataset.shape #dataset contains 140k images with 2 column

In [None]:
df_dataset.value_counts("img_status") #checking the distribution of fake vs real images

In [None]:
df_dataset.isnull().sum() #checking for missing data

In [None]:
df_dataset.duplicated().sum() #checking for any duplicated values

In [None]:
plt.figure(figsize = (14,10))
for i in range(9):
    random = np.random.randint(1,len(df_dataset))
    plt.subplot(3,3,i+1)
    plt.imshow(plt.imread(df_dataset.loc[random,"image_path"]))
    plt.title(df_dataset.loc[random, "img_status"], size = 10) 
    plt.xticks([])
    plt.yticks([])
    
plt.show()

In [None]:
real = df_dataset.value_counts("img_status")[1]
fake = df_dataset.value_counts("img_status")[0]

print(f"Real: {real},\nFake: {fake}\n")
sns.countplot(df_dataset["img_status"])
plt.show()

# **Pre-processing**

In [None]:
image_gen = ImageDataGenerator()

In [None]:
train_generator = image_gen.flow_from_directory(
    path + 'train/',
    target_size=(224, 224),
    batch_size=100,
    color_mode='grayscale',
    class_mode='binary'
)

In [None]:
valid_generator = image_gen.flow_from_directory(
    path + 'valid/',
    target_size=(224, 224),
    batch_size=100,
    color_mode='grayscale',
    class_mode='binary'
)

In [None]:
test_generator = image_gen.flow_from_directory(
    path + 'test/',
    target_size=(224, 224),
    batch_size=1,
    color_mode='grayscale',
    shuffle = False,
    class_mode='binary'
)

# **Creating DenseNet Model**

In [None]:
def CNNDN(inbuild, categories , f1 = 32):
    
    #batch norm + relu + conv
    def convlayer1 (x,f1,kernel=1,strides=1):
        
        x = BatchNormalization()(x)
        x = ReLU()(x)
        x = Conv2D(f1, kernel, strides=strides,padding = 'same')(x)
        return x
    
    def DB(x, reps):
        for _ in range(reps):
            y = convlayer1(x, 4*f1)
            y = convlayer1(y, f1, 3)
            x = concatenate([y,x])
        return x
        
    def trans_layer(x):
        x = convlayer1(x, K.int_shape(x)[-1] //2 )
        x = AvgPool2D(2, strides = 2, padding = 'same')(x)
        return x
    
    input = Input (inbuild)
    x = Conv2D(64, 7, strides = 2, padding = 'same')(input)
    x = MaxPool2D(3, strides = 2, padding = 'same')(x)
    
    for reps in [6,12,24,16]:
        d = DB(x, reps)
        x = trans_layer(d)
    x = GlobalAveragePooling2D()(d)
    output = Dense(categories, activation = 'softmax')(x)
    
    model = Model(input, output)
    return model

inbuild = 224,224,1 
categories = 2
model = CNNDN(inbuild,categories)

In [None]:
model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])

In [None]:
model.summary()

-----------------------------


# **Training the DenseNet Model**

In [None]:
steps_per_epoch = train_generator.n // train_generator.batch_size
validation_steps = valid_generator.n // valid_generator.batch_size

hist = model.fit(train_generator,
                    validation_data=valid_generator,
                    steps_per_epoch=steps_per_epoch,
                    validation_steps=validation_steps,
                    epochs=4
                   )

--------

# **Evaluating the DenseNet Model**

In [None]:
plt.figure(figsize=(14,5))
plt.subplot(1,2,2)
plt.plot(hist.history['accuracy'])
plt.plot(hist.history['val_accuracy'])
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(['train', 'val'])

In [None]:
plt.subplot(1,2,1)
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.title('model Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend(['train', 'val'])
plt.show()

In [None]:
_, accuracy = model.evaluate(test_generator)
print('Accuracy Test to 3 figures = {:.3f}'.format(accuracy*100))