In [1]:
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from imageio import imread
import os

Training Dataset

In [2]:
samples_pristine_grayscale = np.load('../data/sample_images/k64 grayscale 40percent stride32/samples_pristine.npy')
samples_fake_grayscale = np.load('../data/sample_images/k64 grayscale 40percent stride32/sample_fakes_np.npy')

# Concatenate authentic and fake image samples along axis=0 to generate train set for grayscale sampled images
train_data_grayscale = np.concatenate((samples_pristine_grayscale, samples_fake_grayscale), axis=0)

In [3]:
len(samples_pristine_grayscale)

4842

In [4]:
len(samples_fake_grayscale)

4873

In [5]:
len(train_data_grayscale)

9715

In [6]:
train_labels_grayscale = [0]*len(samples_pristine_grayscale)+[1]*len(samples_fake_grayscale)

In [7]:
len(train_labels_grayscale)

9715

In [8]:
x_train, x_cv, y_train, y_cv = train_test_split(train_data_grayscale, train_labels_grayscale, test_size=0.3, stratify = train_labels_grayscale)

In [9]:
if not os.path.isdir("dataset"):
    os.mkdir("dataset")

In [11]:
np.save("dataset/x_train_np.npy", x_train)
np.save("dataset/x_cv_np.npy", x_cv)
np.save("dataset/y_train_np.npy", y_train)
np.save("dataset/y_cv_np.npy", y_cv)

Test Dataset

In [27]:
fake_path = "../data/training/fake/"
pristine_path = "../data/training/pristine/"

In [28]:
with open('../data/pickle/images_names/x_test_fakes_names.pickle', 'rb') as f:
    fakes_names=pickle.load(f)

In [29]:
fakes_names[:10]

['aa61a96b0a18b8dbc65fd20af3644958.png',
 'd8fd021d1ca1e21880a0b84effa7157b.png',
 'c6b5dc9a4db605cdc15a51d4bd6cb228.png',
 '934741e32f2fc90678fee50aea8cc413.png',
 'd6933d2484b99d63cab5532c478f82bc.png',
 '8812398ed09ccb906f4e0d608aec0913.png',
 'be51269d525b84339af824a319814cda.png',
 'c9171cef5cbaa557cfda9c22532e74c3.png',
 'b0060704d02f1229b75cbd550c7267b4.png',
 'd6388ee9f63e1111d41ce66ddf06ff41.png']

In [30]:
with open('../data/pickle/images_names/x_test_pristines_names.pickle', 'rb') as f:
    pristines_names=pickle.load(f)

In [31]:
pristines_names[:10]

['0ef3cb6f8128f134955cee608831c5c1.png',
 '07af4ffd4a3e7a37903b9a22affccfed.png',
 '5da397367179d6cceb846dfbce4f99a5.png',
 '4c5fd5a436373332833df3bb79dad169.png',
 '03a529bdfeb4484b679ce36d419c0a04.png',
 '0d97374c0e9e15ac0092b1bdbb9161a5.png',
 '1e41bd6b60e2c6fc408fdc7796e64ce0.png',
 '1ffcceddbb7678d36c5c7afd0e7b803d.png',
 '5ff53374f20d12a37c315ec7288396b8.png',
 '0a5dd139b2ee2b9f93faeb2bb24ca7f4.png']

In [32]:
x_test_filenames = pristines_names + fakes_names
y_test = [0]*len(pristines_names) + [1]*len(fakes_names)

In [33]:
img = imread(fake_path+fakes_names[0])
img.shape

(1536, 2048, 3)

In [40]:
y_test

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,


In [36]:
def sample_test_image(img):
    kernel_size=64
    
    x_start=0
    y_start=0
    i=0
    
    print((img.shape[0]//kernel_size)*(img.shape[1]//kernel_size))
    
    samples=np.ndarray(shape=((img.shape[0]//kernel_size)*(img.shape[1]//kernel_size), 64, 64, 3), dtype=np.dtype('uint8'))
    
    for y_start in range(0, img.shape[0] - kernel_size + 1, kernel_size):
        for x_start in range(0, img.shape[1] - kernel_size + 1, kernel_size):
            samples[i, :, :, :] = img[y_start:y_start + kernel_size, x_start:x_start + kernel_size, :3]
            i+=1
    
    return samples

In [38]:
x_test = []
for filename in fakes_names:
    x_test.append(sample_test_image(imread(fake_path+filename)))
for filename in pristines_names:
    x_test.append(sample_test_image(imread(pristine_path+filename)))

768
3350
192
192
192
192
192
192
192
3350
192
192
768
1200
192
768
192
192
192
176
176
165
192
192
192
192
176
192
192
96
768
192
192
192
180
192
2852
336
192
350
192
432
768
192
192
192
450
192
3350
192
192
192
192
192
768
192
300
192
2852
165
768
176
192
2852
1200
192
192
192
350
192
192
192
768
192
192
768
192
192
192
3350
192
192
192
768
192
192
3350
192
192
192
192
192
160
160
192
192
192
192
192
192
192
160
192
160
192
192
192
192
192
192
160
192
192
160
192
192
192
192
160
192
192
192
192
192
192
192
192
192
192
192
192
160
192
128
160
192
128
160
192
192
192
192
192
192
192
192
160
160
160
192
192
192
192
192
192
192
192
192
192
192
192
192
192
192
192
160
128
192
192
192
192
192
160
192
192
160
192
192
160
192
192
192
128
192
192
192
192
192
192
192
128
192
192
192
192
192
160
192
192
128
192
192
192
192
192
192
192
192
192
160
192
192
192
128
192
192
192
192
160
192
192
192
128
192
192
192
160
192
160
192
180
192
192
128
192
192
192
192
192
192
160
192
192
192
192
192
192
160

In [41]:
x_test = np.array(x_test, dtype=object)

In [42]:
len(x_test)

295

In [43]:
y_test

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,


In [44]:
np.save("dataset/x_test_np.npy", x_test)
np.save("dataset/y_test_np.npy", y_test)