In [1]:
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
%matplotlib inline

np.random.seed(2)

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import itertools

from keras.utils.np_utils import to_categorical # convert to one-hot-encoding
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from keras import optimizers

sns.set(style='white', context='notebook', palette='deep')

In [2]:
from PIL import Image
import os
from pylab import *
import re
from PIL import Image, ImageChops, ImageEnhance

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
def convert_to_ela_image(path, quality):
    filename = path
    resaved_filename = filename.split('.')[0] + '.resaved.jpg'
    ELA_filename = filename.split('.')[0] + '.ela.png'
    
    im = Image.open(filename).convert('RGB')
    im.save(resaved_filename, 'JPEG', quality=quality)
    resaved_im = Image.open(resaved_filename)
    
    ela_im = ImageChops.difference(im, resaved_im)
    
    extrema = ela_im.getextrema()
    max_diff = max([ex[1] for ex in extrema])
    if max_diff == 0:
        max_diff = 1
    scale = 255.0 / max_diff
    
    ela_im = ImageEnhance.Brightness(ela_im).enhance(scale)
    
    return ela_im

In [None]:
pristine_training_path = 'drive/MyDrive/image-forgery/training/pristine/'
fake_training_path = 'drive/MyDrive/image-forgery/training/fake/'

saved_model_path = 'drive/MyDrive/Capstone Project/saved_models/'
pickle_path = 'drive/MyDrive/Capstone Project/pickle/ELA/'

## Training Data Preparation

In [None]:
pristines = os.listdir(pristine_training_path)
pristines = list(filter(lambda x : 'resave' not in x, pristines))

In [None]:
fakes = os.listdir(fake_training_path)[:-6]
fakes = list(filter(lambda x : 'resave' not in x and x != 'masks', fakes))

In [None]:
X_pristine = []
X_fake = []

for pristine in pristines:
    X_pristine.append(array(convert_to_ela_image(pristine_training_path + pristine, 90).resize((128, 128))).flatten() / 255.0)

with open(pickle_path + 'X_pristine.pickle', 'wb') as f:
    pickle.dump(X_pristine, f)

In [None]:
for fake in fakes:
    X_fake.append(array(convert_to_ela_image(fake_training_path + fake, 90).resize((128, 128))).flatten() / 255.0)

with open(pickle_path + 'X_fake.pickle', 'wb') as f:
    pickle.dump(X_fake, f)

In [None]:
X = X_pristine + X_fake
Y = [0] * len(X_pristine) + [1] * len(X_fake)

## Normalization

In [None]:
X = np.array(X)
Y = np.array(Y)

## Reshape

In [None]:
X = X.reshape(-1, 128, 128, 3)

## Train-test Split

In [None]:
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size = 0.2, random_state=42)

## Model

In [None]:
model = Sequential()

model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'valid', 
                 activation ='relu', input_shape = (128,128,3)))

model.add(Conv2D(filters = 32, kernel_size = (5,5),padding = 'valid', 
                 activation ='relu'))

model.add(MaxPool2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(256, activation = "relu"))
model.add(Dropout(0.5))
model.add(Dense(1, activation = "sigmoid"))

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 124, 124, 32)      2432      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 120, 120, 32)      25632     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 60, 60, 32)        0         
_________________________________________________________________
dropout (Dropout)            (None, 60, 60, 32)        0         
_________________________________________________________________
flatten (Flatten)            (None, 115200)            0         
_________________________________________________________________
dense (Dense)                (None, 256)               29491456  
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0

In [None]:
optimizer = RMSprop(lr=0.0005, rho=0.9, epsilon=1e-08, decay=0.0)

  "The `lr` argument is deprecated, use `learning_rate` instead.")


In [None]:
model.compile(optimizer = optimizer , loss = "binary_crossentropy", metrics=["accuracy"])

In [None]:
early_stopping = EarlyStopping(monitor='val_acc',
                              min_delta=0,
                              patience=2,
                              verbose=0, mode='auto')

In [None]:
Y_train.shape

(1200,)

## Training

In [None]:
epochs = 30
batch_size = 100

model.fit(X_train, Y_train, batch_size=batch_size, epochs=epochs, 
          validation_data=(X_val, Y_val), verbose=1, callbacks=[early_stopping])

model.save(saved_model_path + 'ela_model.h5')

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [None]:
model.predict(X_val[:10])

array([[0.09785885],
       [0.94203883],
       [0.5718909 ],
       [0.98205626],
       [0.3356946 ],
       [0.6285888 ],
       [0.3540867 ],
       [0.29777113],
       [0.60776705],
       [0.20294803]], dtype=float32)

In [None]:
Y_val[:10]

array([1, 1, 0, 0, 0, 0, 1, 0, 1, 0])