In [18]:
# from google.colab import drive
# drive.mount('/content/drive')

In [19]:
import cv2
import os
import random
import numpy as np

# Path to the directory containing the image folders on Google Drive
# base_path = "/content/drive/My Drive/archive"

base_path = "/Users/mrbinit/Downloads/archive/"

# List to store all image file paths and corresponding labels
image_paths = []
labels = []

# Function to recursively find all image files in a directory and assign labels
def find_image_files(directory, label):
    for root, dirs, files in os.walk(directory):
        for file in files:
            # Check for image file formats
            if file.lower().endswith((".jpg", ".jpeg", ".png")):
                image_paths.append(os.path.join(root, file))
                labels.append(label)  # Assign label to the corresponding image

# Find image files in the 'fake-v2' folder
fake_v2_path = os.path.join(base_path, "fakeV2/fake-v2")
find_image_files(fake_v2_path, label=1)  # Assign label 1 for AI-generated images

# Find image files in the 'real' folder
real_path = os.path.join(base_path, "real")
find_image_files(real_path, label=0)  # Assign label 0 for hand-made images

# Shuffle the image paths and labels in parallel
combined_data = list(zip(image_paths, labels))
random.shuffle(combined_data)
image_paths, labels = zip(*combined_data)

# Convert labels to numpy array for easier handling
labels = np.array(labels)

# Display the number of images and labels loaded
print(f"Total images: {len(image_paths)}")
print(f"Total labels: {len(labels)}")



Total images: 21635
Total labels: 21635


In [20]:
# Define the percentage of data
data_percentage = 10
num_samples = int(len(image_paths) * (data_percentage / 100))

# Use a subset of image paths and labels
sampled_image_paths = image_paths[:num_samples]
sampled_labels = labels[:num_samples]

# Load and process the sampled images
for img_path, label in zip(sampled_image_paths, sampled_labels):
    # Read the image using OpenCV
    img = cv2.imread(img_path)



In [21]:
height, width, channels = img.shape
print(f"Image at '{img_path}' has dimensions: {width}x{height} pixels with {channels} channels")

Image at '/Users/mrbinit/Downloads/archive/fakeV2/fake-v2/21225.png' has dimensions: 1024x1024 pixels with 3 channels


In [22]:
prev_dataset_size = len(image_paths)
dataset_size = len(sampled_image_paths)
print('The size of the entire dataset is:', prev_dataset_size)
print('the size of the entire dataset after ', dataset_size)

The size of the entire dataset is: 21635
the size of the entire dataset after  2163


In [23]:
from sklearn.model_selection import train_test_split

# Split the dataset into train, test, and validation sets using sklearn
train_paths, test_val_paths, train_labels, test_val_labels = train_test_split(
    sampled_image_paths, sampled_labels, test_size=0.2, stratify=sampled_labels, random_state=42)

test_paths, val_paths, test_labels, val_labels = train_test_split(
    test_val_paths, test_val_labels, test_size=0.5, stratify=test_val_labels, random_state=42)

# Display sizes of train, test, and validation sets
print('Size of train set:', len(train_paths))
print('Size of test set:', len(test_paths))
print('Size of validation set:', len(val_paths))

Size of train set: 1730
Size of test set: 216
Size of validation set: 217


In [24]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split



In [25]:

# Define image augmentation layers
img_augmentation_layers = [
    tf.keras.layers.RandomRotation(factor=0.15),
    tf.keras.layers.RandomTranslation(height_factor=0.1, width_factor=0.1),
    tf.keras.layers.RandomFlip(),
    tf.keras.layers.RandomContrast(factor=0.1),
]

def apply_image_augmentation(image):
    # Convert image to TensorFlow tensor
    image = tf.convert_to_tensor(image)
    # Apply each augmentation layer sequentially
    for layer in img_augmentation_layers:
        image = layer(image)
    # Convert back to NumPy array
    augmented_image = image.numpy()
    return augmented_image

def load_and_preprocess_images(image_paths, labels, input_size=224, augment=True):
    images = []
    for image_path in image_paths:
        # Read and resize image
        image = cv2.imread(image_path)
        image = cv2.resize(image, (input_size, input_size))
        # Normalize pixel values to [0, 1]
        image = image.astype(np.float32) / 255.0

        if augment:
            augmented_image = apply_image_augmentation(image)
            images.append(augmented_image)
        else:
            images.append(image)

    images = np.array(images)
    return images, labels

In [26]:
from sklearn.utils import shuffle
input_size = 224

# Preprocess the training, validation, and test data
train_data = load_and_preprocess_images(train_paths, train_labels, input_size=input_size, augment=True)
val_data = load_and_preprocess_images(val_paths, val_labels, input_size=input_size, augment=False)
test_data = load_and_preprocess_images(test_paths, test_labels, input_size=input_size, augment=False)

# Shuffle training data (optional)
train_data = shuffle(train_data)

# Convert preprocessed data to numpy arrays
X_train, y_train = np.array([item[0] for item in train_data]), np.array([item[1] for item in train_data])
X_val, y_val = np.array([item[0] for item in val_data]), np.array([item[1] for item in val_data])
X_test, y_test = np.array([item[0] for item in test_data]), np.array([item[1] for item in test_data])


  X_train, y_train = np.array([item[0] for item in train_data]), np.array([item[1] for item in train_data])
  X_val, y_val = np.array([item[0] for item in val_data]), np.array([item[1] for item in val_data])
  X_test, y_test = np.array([item[0] for item in test_data]), np.array([item[1] for item in test_data])


In [5]:
import cv2
import os

# Define the percentage of data
data_percentage = 10
num_samples = int(len(image_paths) * (data_percentage / 100))

# Specify the folder where you want to save the processed images
output_folder = 'processed_images'

# Create the output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Use a subset of image paths and labels
sampled_image_paths = image_paths[:num_samples]
sampled_labels = labels[:num_samples]

# Load and process the sampled images
for i, (img_path, label) in enumerate(zip(sampled_image_paths, sampled_labels)):
    # Read the image using OpenCV
    img = cv2.imread(img_path)

    # Here you can perform any image processing on 'img' as needed
    # For example, you might want to resize or apply filters

    # Save the processed image to the output folder
    filename = f'image_{i + 1}.jpg'  # Naming convention for saved images
    output_path = os.path.join(output_folder, filename)
    cv2.imwrite(output_path, img)

    print(f'Saved {output_path}')

print('Image saving complete.')


NameError: name 'image_paths' is not defined