In [None]:
"""
Group Number 65
CPS 834
11 30 2024

Name                      Student Number      Email
-----------------------------------------------------------------------------
Harrison Kalathil         501138547           harrison.kalathil@torontomu.ca
Maninder Arora            501041960           maninder.arora@torontomu.ca
Rahabar Sekandar          501100634           rsekandar@torontomu.ca
Steven Chen               501096820           steven1.chen@torontomu.ca

This program creates and trains a tensorflow model to predict whether an image,
in this case a cat image, is real or is ai-generated.
"""

In [None]:
import numpy as np
import cv2
import os
import random
import setuptools.dist
from tensorflow.keras import layers, models, Input

In [None]:
DIRECTORY = r'.\data'          # Relative path where images are stored
CATEGORIES = ["real", "fake"]  # Categories of images: real (image is real) and fake (image is ai-generated)
IMG_SIZE = 256                 # Image pixel width and height model will train on

In [None]:
data = []  # Array to store images as arrays along with their label (real or fake) as a number (0 or 1)

# Populate data array with image-arrays and with label-numbers
# using os to gather images and cv2 to resize and convert images to arrays
for category in CATEGORIES:
    folder = os.path.join(DIRECTORY, category)
    label = CATEGORIES.index(category)
    
    for image in os.listdir(folder):
        image_path = os.path.join(folder, image)
        image_arr = cv2.imread(image_path)
        image_arr = cv2.resize(image_arr, (IMG_SIZE, IMG_SIZE))

        data.append([image_arr, label])

In [None]:
# Shuffle data so as to not train model with all the real images and then all the fake ones,
# and rather train it with a more even distribution of real and fake images
random.shuffle(data)

In [None]:
# Separate data[] into features (images) and labels (0 or 1) to prepare for tensorflow
features = []
labels = []

for feature, label in data:
    features.append(feature)
    labels.append(label)

In [None]:
# Convert arrays to numpy arrays for tensorflow
features = np.array(features) / 255  #  Divide by 255 to convert pixel values (0-255) to decimal (0-1)
labels = np.array(labels)

In [None]:
# Create CNN model, borrowed from the 'Simple_CNN' resource posted on d2l
model = models.Sequential([
    Input(shape=(IMG_SIZE, IMG_SIZE, 3)), # Explicit Input layer
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='sigmoid') # Sigmoid for binary classification
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Train model with 10 epochs and a tenth validation split
model.fit(features, labels, epochs=10, validation_split=0.1)

In [None]:
# Save model to be used in other directories
model.save('model.keras')