In [36]:
import tensorflow as tf
from tensorflow import keras

from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

from sklearn.metrics import accuracy_score, confusion_matrix

import numpy as np

# use pillow for checking image sizes
import os
from PIL import Image

# Checking Image Sizes

In [19]:
# function to identify all unique image sizes for images in a directory
def print_img_sizes(path):
    # Get a list of all image file names in the directory
    image_files = [file for file in os.listdir(path) if file.endswith(('.jpg', '.jpeg', '.png'))]

    # Create a set to store unique image sizes
    unique_sizes = set()

    # Iterate over each image file and store its size in the set
    for image_file in image_files:
        image_path = os.path.join(path, image_file)
        with Image.open(image_path) as image:
            width, height = image.size
            size = (width, height)
            unique_sizes.add(size)

    # Print the unique image sizes
    print("Unique Image Sizes:")
    for size in unique_sizes:
        print(f"{size[0]}x{size[1]}")

In [20]:
print_img_sizes('../data/test/FAKE/')

Unique Image Sizes:
32x32


Images are 32x32, and are in color.

# Build ImageDataGenerator Classes for Training

In [21]:
# build an instance of the ImageDataGenerator class
datagen = ImageDataGenerator(
    rescale=1.0/255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

In [22]:
# build the training set
train_generator = datagen.flow_from_directory(
    directory="../data/train",
    target_size=(32,32),
    batch_size=32,
    class_mode="categorical"
)

Found 100000 images belonging to 2 classes.


In [23]:
print("Image shape of each observation: ",train_generator.image_shape)
print("Number of classes: ",train_generator.num_classes)

Image shape of each observation:  (32, 32, 3)
Number of classes:  2


# Build ImageDataGenerator Class for Testing

In [24]:
# Generate the test set
test_generator = datagen.flow_from_directory(
    directory="../data/test",
    target_size=(32,32),
    batch_size=32,
    class_mode="categorical"
)

Found 20000 images belonging to 2 classes.


# Build the Neural Network

In [25]:
# Create an instance of Sequential
classifier = Sequential()

# Add a Conv2D layer. Applies a set of filters to the input data, each filter learns to recognize different patterns or features
classifier.add(Conv2D(filters=32, 
                      kernel_size=(3, 3), 
                      input_shape=train_generator.image_shape, 
                      activation='relu')
                      )

# Add a MaxPooling2d layer. Performs downsampling on the data, reduces dimensions. Divides input data into non-overlapping regions (pooling windows).
# Maximum value is output within each window.
classifier.add(MaxPooling2D(pool_size=(2, 2)))

# Add a Flatten layer. Reshape data into a 1d array. Transition the convolution and pooling layers to the fully connected layers.
classifier.add(Flatten())

# Add a Dense layer. A fully connected layer, allows for the learning of relationships. Activation function introduces non-linearity
classifier.add(Dense(units=128,
                     activation='relu')
                     )

# Add a Final Dense layer. This will output our probabilities.
classifier.add(Dense(units=train_generator.num_classes,
                     activation='softmax')
                     )

# Compile the model
classifier.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [26]:
# train the model
classifier.fit(
    train_generator,
    steps_per_epoch=3,
    epochs=200
    )

# save the model to disk
classifier.save('my_model.h5')
print('Model saved to disk')

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

# Testing

In [32]:
# Load your trained model
model = keras.models.load_model('my_model.h5')

# Make predictions on the test set
predictions = model.predict(test_generator)

# Convert the predictions to class labels
predicted_classes = np.argmax(predictions, axis=1)

# Get the true class labels
true_classes = test_generator.classes



In [38]:
# Evaluate the accuracy
print(accuracy_score(predicted_classes, true_classes))
confusion_matrix(predicted_classes, true_classes)

0.5025


array([[6102, 6052],
       [3898, 3948]], dtype=int64)