# CPE695 Final Project for Team 1: <br>
**Group Members**: Ronald "Joey" Rupert, Andrew Greensweight, Michael Salek <br><br>
**Problem Statement:** <br>
The quality of AI-generated images has rapidly increased, leading to concerns of authenticity and trustworthiness. The aim of this project is to investigate whether computer vision techniques can effectively detect when images have been generated by AI. By addressing this problem, we can contribute to the development of algorithms that enhance the authenticity verification of images.
<br>
<br>
**Information on Dataset:** <br>
 https://www.kaggle.com/datasets/birdy654/cifake-real-and-ai-generated-synthetic-images
The dataset contains two classes - REAL and FAKE. For REAL, the images are collected from Krizhevsky & Hinton's CIFAR-10 dataset. For the FAKE images, they were generated to be the  equivalent of CIFAR-10 with Stable Diffusion version 1.4.There are 100,000 images for training (50k per class) and 20,000 for testing (10k per class).
<br>

# Loading in the Data

In [None]:
import os
import cv2
import numpy as np
from skimage.io import imread
from skimage.transform import resize
from sklearn.model_selection import train_test_split
from keras import datasets, layers, models, losses
from google.colab import drive

#Currently using 20k real and 20k fake images for the training set because it was too much data to upload to Colab
#The test set consists of 8k real images and 8k fake images
real_folder_0 = "/content/drive/MyDrive/CPE 695 - Summer 2023/Smaller Dataset for Final Project/real_0"
fake_folder_0 = "/content/drive/MyDrive/CPE 695 - Summer 2023/Smaller Dataset for Final Project/fake_0"

test_real_folder = "/content/drive/MyDrive/CPE 695 - Summer 2023/Smaller Dataset for Final Project/test_real_0"
test_fake_folder = "/content/drive/MyDrive/CPE 695 - Summer 2023/Smaller Dataset for Final Project/test_fake_0"

def load_images_from_folder(folder):
    images = []
    labels = []
    for filename in os.listdir(folder):
        if filename.endswith(".jpg"):
            image = cv2.imread(os.path.join(folder, filename))
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
            images.append(image)
            labels.append("real" if "real" in folder else "fake")  # Assign labels based on the folder name
    return images, labels


# Load real images and labels
real_images, real_labels = load_images_from_folder(real_folder_0)

# Load fake images and labels
fake_images, fake_labels = load_images_from_folder(fake_folder_0)

# Combine real and fake images and labels
training_images = np.concatenate((real_images, fake_images), axis=0)
training_labels = np.concatenate((real_labels, fake_labels), axis=0)

# Set a random seed for reproducibility
random_seed = 42

# Shuffle the training set
np.random.seed(random_seed)
shuffle_indices = np.random.permutation(len(training_images))
training_images = training_images[shuffle_indices]
training_labels = training_labels[shuffle_indices]

# Combine the training set
combined_data = list(zip(training_images, training_labels))
#Tuples of images and labels
training_images, training_labels = zip(*combined_data)

#Convert the images and labels back into NumPy arrays for further processing
training_images = np.array(training_images)
training_labels = np.array(training_labels)

FileNotFoundError: ignored

# Pre-Processing
Perform geometry augmentation shifts and use a Pre-trained CNN to extract features

In [None]:
!pip install tensorflow




In [None]:
from skimage.transform import resize
import tensorflow.keras.applications as keras_applications
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import accuracy_score

# Define image dimensions
image_size = (32, 32)

# Define the geometry augmentation parameters
augmentation_params = {
    "flip": True,
    "shift_range": 0.2
}

def apply_geometry_augmentations(images, labels, augmentation_params):
    datagen = ImageDataGenerator(
        horizontal_flip=augmentation_params["flip"],
        width_shift_range=augmentation_params["shift_range"],
        height_shift_range=augmentation_params["shift_range"]
    )
    augmented_images = []
    augmented_labels = []
    for image, label in zip(images, labels):
        augmented_images.append(image)
        augmented_labels.append(label)
        if augmentation_params["flip"]:
            flipped_image = cv2.flip(image, 1)
            augmented_images.append(flipped_image)
            augmented_labels.append(label)
        if augmentation_params["shift_range"]:
            shifted_image = datagen.random_transform(image)
            augmented_images.append(shifted_image)
            augmented_labels.append(label)
    return augmented_images, augmented_labels



# Load real images and labels for the test set
test_real_images, test_real_labels = load_images_from_folder(test_real_folder)
test_fake_images, test_fake_labels = load_images_from_folder(test_fake_folder)

# Combine real and fake images and labels for the test set
test_images = np.concatenate((test_real_images, test_fake_images), axis=0)
test_labels = np.concatenate((test_real_labels, test_fake_labels), axis=0)

# Load pre-trained MobileNetV2 model (without the top classifier)
model = keras_applications.MobileNetV2(include_top=False, weights='imagenet', input_shape=(image_size[0], image_size[1], 3))

# Reshape the images to match the input shape of MobileNetV2
reshaped_training_images = []
for image in training_images:
    resized_image = resize(image, image_size)
    reshaped_training_images.append(resized_image)
reshaped_training_images = np.array(reshaped_training_images)

reshaped_test_images = []
for image in test_images:
    resized_image = resize(image, image_size)
    reshaped_test_images.append(resized_image)
reshaped_test_images = np.array(reshaped_test_images)

# Flatten the images to use as input features for the KNN classifier
X_train = reshaped_training_images.reshape(len(reshaped_training_images), -1)
y_train = training_labels
X_test = reshaped_test_images.reshape(len(reshaped_test_images), -1)
y_test = test_labels



In [None]:
print("X_train:")
print(X_train[:10])
print("X_test:")
print(X_test[:10])

X_train:
[[0.11372549 0.08235294 0.         ... 0.23921569 0.21960784 0.        ]
 [1.         0.59215686 0.01568627 ... 0.21568627 0.1254902  0.09411765]
 [0.70588235 0.74509804 0.78039216 ... 0.38039216 0.44705882 0.50980392]
 ...
 [0.79215686 0.79215686 0.8        ... 0.48627451 0.48627451 0.49411765]
 [0.03137255 0.05490196 0.01568627 ... 0.37254902 0.27058824 0.14117647]
 [0.43137255 0.54901961 0.69803922 ... 0.29803922 0.39607843 0.42352941]]
X_test:
[[0.36470588 0.2627451  0.17254902 ... 0.52941176 0.41568627 0.28235294]
 [0.82352941 0.81176471 0.75294118 ... 0.43529412 0.54509804 0.38431373]
 [0.81960784 0.76470588 0.72941176 ... 0.58823529 0.65098039 0.54901961]
 ...
 [0.20392157 0.33333333 0.29803922 ... 0.20784314 0.25882353 0.15686275]
 [0.98823529 0.89803922 0.86666667 ... 0.97647059 0.85098039 0.70196078]
 [0.94901961 0.94117647 0.98431373 ... 0.08235294 0.09019608 0.07058824]]




```
# This is formatted as code
```

Build and fit CNN model, RJR

In [None]:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(image_size[0], image_size[1], 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10))

model.summary()

model.compile(optimizer='adam',
              loss=losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

history = model.fit(reshaped_training_images, training_labels, epochs=10,
                    validation_data=(reshaped_test_images, test_labels))

test_loss, test_acc = model.evaluate(reshaped_test_images,  test_labels, verbose=2)

print(test_acc)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 30, 30, 32)        896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 15, 15, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 13, 13, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 6, 6, 64)         0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 4, 4, 64)          36928     
                                                                 
 flatten (Flatten)           (None, 1024)              0