In [None]:
# Import necessary libraries
import numpy as np
import os
import shutil
import random
from sklearn.metrics import confusion_matrix
import tensorflow as tf
from IPython.display import display
from PIL import Image
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, Activation, MaxPooling2D, Dense, Flatten, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
# Initialize the model
realFakeCNN = Sequential()

# Add the first Convolutional layer with 32 filters, 3x3 size, and input shape (200, 200, 3)
realFakeCNN.add(Conv2D(32, (3, 3), input_shape=(200, 200, 3)))
realFakeCNN.add(Activation('relu'))
realFakeCNN.add(MaxPooling2D(pool_size=(2, 2)))

# Add the second Convolutional layer with 32 filters, 3x3 size
realFakeCNN.add(Conv2D(32, (3, 3)))
realFakeCNN.add(Activation('relu'))
realFakeCNN.add(MaxPooling2D(pool_size=(2, 2)))

# Add the third Convolutional layer with 32 filters, 3x3 size
realFakeCNN.add(Conv2D(32, (3, 3)))
realFakeCNN.add(Activation('relu'))
realFakeCNN.add(MaxPooling2D(pool_size=(2, 2)))

# Add the fourth Convolutional layer with 32 filters, 3x3 size
realFakeCNN.add(Conv2D(32, (3, 3)))
realFakeCNN.add(Activation('relu'))
realFakeCNN.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten the dataset
realFakeCNN.add(Flatten())

# Add a Dense layer with 64 units and ReLU activation
realFakeCNN.add(Dense(64))
realFakeCNN.add(Activation('relu'))

# Add Dropout layer to overcome overfitting
realFakeCNN.add(Dropout(0.5))

# Add one more fully connected layer to get the output in n-dimensional classes (a vector will be the output)
realFakeCNN.add(Dense(1))

# Add the Sigmoid function to convert to probabilities
realFakeCNN.add(Activation('sigmoid'))

# Print a summary of the network
realFakeCNN.summary()

In [None]:
# Compile the model with RMSprop optimizer, binary crossentropy loss, and accuracy metric
realFakeCNN.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
def split_train_test(source_dir, train_dir, test_dir, split_ratio=0.7):
    # List all files in the source directory
    files = os.listdir(source_dir)
    num_files = len(files)
    
    # Calculate the number of files for the train set
    num_train = int(num_files * split_ratio)
    
    # Shuffle the files randomly
    random.shuffle(files)
    
    # Create train and test directories if they don't exist
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)
    
    # Move files to train and test directories based on the split ratio
    for i, file in enumerate(files):
        source_file = os.path.join(source_dir, file)
        if i < num_train:
            shutil.copy(source_file, os.path.join(train_dir, file))
        else:
            shutil.copy(source_file, os.path.join(test_dir, file))

# Replace these paths with your source directory, train directory, and test directory
source_directory = ''
train_directory = ''
test_directory = ''

# Split the contents of the source directory into train and test
split_train_test(source_directory, train_directory, test_directory)


In [None]:
import os
import shutil

# Define the paths to the main folder and subfolders
main_folder = ''
subfolder = ''

# Function to move files from a source folder to the main folder
def move_files(source_folder):
    for file_name in os.listdir(source_folder):
        source_path = os.path.join(source_folder, file_name)
        destination_path = os.path.join(main_folder, file_name)

        # Check if source and destination paths are the same
        if source_path != destination_path:
            shutil.move(source_path, destination_path)

# Move files from each subfolder to the main folder
move_files(subfolder)

print("Folders combined successfully.")

In [None]:
def move_files_from_subdirectories(root_directory, destination_directory, start_index, end_index, step=1000):
    try:
        # Iterate over the specified range of indices with the given step
        for index in range(start_index, end_index + 1, step):
            subdirectory_name = f"{index:06d}"
            subdirectory_path = os.path.join(root_directory, subdirectory_name)

            # Check if the subdirectory exists
            if os.path.isdir(subdirectory_path):
                # Define the source directory within the subdirectory
                source_directory = os.path.join(subdirectory_path, '')  # Adjust the subdirectory as needed

                # Call the function to move files from source to destination
                move_files_to_destination(source_directory, destination_directory)

        print("All files moved successfully.")
    except FileNotFoundError:
        print(f"Root directory not found: {root_directory}")

# Function to move files from source to destination
def move_files_to_destination(source_folder, destination_folder):
    try:
        # Ensure the destination folder exists, create it if not
        os.makedirs(destination_folder, exist_ok=True)

        # Iterate over files in the source folder
        for file_name in os.listdir(source_folder):
            source_path = os.path.join(source_folder, file_name)
            destination_path = os.path.join(destination_folder, file_name)

            # Check if the file exists before moving
            if os.path.isfile(source_path):
                # Print source and destination paths
                print(f"Moving: {source_path} to {destination_path}")

                # Move the file
                shutil.move(source_path, destination_path)
            else:
                print(f"File not found: {source_path}")

        print("Files moved successfully.")
    except FileNotFoundError:
        print(f"Source directory not found: {source_folder}")

# Example usage:
root_directory = ''  # Specify the root directory
destination_directory = ''  # Specify the destination directory
start_index = 00000  # Specify the start index
end_index = 99000  # Specify the end index

# Call the function to move files from subdirectories to the destination
move_files_from_subdirectories(root_directory, destination_directory, start_index, end_index)

In [None]:
# Data augmentation for training set
train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.25,
    zoom_range=0.25,
    horizontal_flip=True
)

# Data normalization for testing set
test_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
# Load the training data with data augmentation
training_set = train_datagen.flow_from_directory(
    '/Users/jamessweat/Desktop/ml-project/OURDATA/first_test/train',
    target_size=(200, 200),
    batch_size=32,
    class_mode='binary'
)

In [None]:
# Load the testing data without data augmentation
test_set = test_datagen.flow_from_directory(
    '/Users/jamessweat/Desktop/ml-project/OURDATA/first_test/test',
    target_size=(200, 200),
    batch_size=32,
    class_mode='binary'
)

In [None]:
# Train the model using the training set and validate on the test set
realFakeCNN.fit(
    training_set,
    steps_per_epoch=len(training_set),
    epochs=10,
    validation_data=test_set,
    validation_steps=len(test_set)
)

# Save the trained model
# realFakeCNN.save('realfake_cnn_model')

In [None]:
# Load pre-trained VGG16 model without classifier/fully-connected layers
vgg16 = VGG16(weights='imagenet', include_top=False, input_shape=(200, 200, 3))

# Freeze the layers 
for layer in vgg16.layers:
    layer.trainable = False
    
# Add new classifier layers
x = Flatten()(vgg16.output)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
predictions2 = Dense(1, activation='sigmoid')(x) 

# Create the model 
realFakeCNN = tf.keras.Model(inputs=vgg16.input, outputs=predictions2)

# Compile model
realFakeCNN.compile(
  loss='binary_crossentropy',
  optimizer='adam',
  metrics=['accuracy']
)

# Train model on real vs AI-generated images dataset 
realFakeCNN.fit(training_set, epochs=10, validation_data=(test_set))

In [None]:
# Load the trained model
model = load_model('realfake_cnn_model_2')

# Evaluate the model on the test set
evaluation = model.evaluate(test_set)

# Print the test loss and accuracy
print(f"Test Loss: {evaluation[0]}, Test Accuracy: {evaluation[1]}")