In [1]:
! pip install opendatasets


Collecting opendatasets
  Downloading opendatasets-0.1.22-py3-none-any.whl.metadata (9.2 kB)
Collecting kaggle (from opendatasets)
  Downloading kaggle-1.7.4.1-py3-none-any.whl.metadata (16 kB)
Downloading opendatasets-0.1.22-py3-none-any.whl (15 kB)
Downloading kaggle-1.7.4.1-py3-none-any.whl (173 kB)
Installing collected packages: kaggle, opendatasets
Successfully installed kaggle-1.7.4.1 opendatasets-0.1.22


In [None]:
import opendatasets as od
od.download("https://www.kaggle.com/datasets/manjilkarki/deepfake-and-real-images")

Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username:Your Kaggle Key:Your Kaggle Key:Your Kaggle Key:Your Kaggle Key:Your Kaggle Key:Dataset URL: https://www.kaggle.com/datasets/manjilkarki/deepfake-and-real-images


In [None]:
# Cell 1: Import necessary libraries
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns

In [None]:
# Cell 2: Define image preprocessing functions
def load_image(image_path):
    """Step 1: Load image - Read the image file"""
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert from BGR to RGB
    return img

# Cell 5: More preprocessing functions
def resize_image(image):
    """Step 2: Resize - Convert to 224x224 pixels"""
    return cv2.resize(image, (224, 224))

def normalize_image(image):
    """Step 3: Normalize - Scale pixel values to 0-1"""
    return image / 255.0

def convert_to_tensor(image):
    """Step 4: Convert to Tensor - Prepare image for training"""
    return tf.convert_to_tensor(image, dtype=tf.float32)

# Cell 6: Full preprocessing pipeline
# Full preprocessing pipeline for a single image
def preprocess_image(image_path):
    img = load_image(image_path)  # Step 1
    img = resize_image(img)       # Step 2
    img = normalize_image(img)    # Step 3
    img_tensor = convert_to_tensor(img)  # Step 4
    return img_tensor

In [None]:
# Define directory paths to match your structure
data_root = "/content/deepfake-and-real-images"  # Adjust this if needed
train_dir = os.path.join(data_root, "Dataset/Train")
val_dir = os.path.join(data_root, "Dataset/Validation")
test_dir = os.path.join(data_root, "Dataset/Test")

# Verify directories exist
print(f"Train directory exists: {os.path.exists(train_dir)}")
print(f"Validation directory exists: {os.path.exists(val_dir)}")
print(f"Test directory exists: {os.path.exists(test_dir)}")



In [None]:
# Create data generators incorporating the preprocessing steps
train_datagen = ImageDataGenerator(
    rescale=1./255,  # Step 3: Normalize
    rotation_range=20,  # Step 5: Data Augmentation
    width_shift_range=0.2,  # Step 5: Data Augmentation
    height_shift_range=0.2,  # Step 5: Data Augmentation
    shear_range=0.2,  # Step 5: Data Augmentation
    zoom_range=0.2,  # Step 5: Data Augmentation
    horizontal_flip=True  # Step 5: Data Augmentation
)

# Only normalize for validation and test (no augmentation needed)
val_datagen = ImageDataGenerator(rescale=1./255)  # Step 3: Normalize
test_datagen = ImageDataGenerator(rescale=1./255)  # Step 3: Normalize
batch_size = 32

In [None]:
# Create generators
batch_size = 32

# Validation generator
validation_generator = val_datagen.flow_from_directory(
    val_dir,
    target_size=(224, 224),  # Step 2: Resize
    batch_size=batch_size,
    class_mode='binary',
    classes=['Real', 'Fake']  # Changed to match your capitalization
)

# Modify your training data generator to use a subset
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.8  # This will use only 20% of your data
)

# Then in your train_generator
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='binary',
    classes=['Real', 'Fake'],
    subset='training'  # This will use the training subset
)

In [None]:
plt.figure(figsize=(15, 8))
sample_batch = next(train_generator)
for i in range(min(9, batch_size)):
    plt.subplot(3, 3, i+1)
    plt.imshow(sample_batch[0][i])
    plt.title(f"Label: {'Real' if sample_batch[1][i] == 0 else 'Fake'}")
    plt.axis('off')
plt.tight_layout()
plt.show()
