In [1]:
import kagglehub
manjilkarki_deepfake_and_real_images_path = kagglehub.dataset_download('manjilkarki/deepfake-and-real-images')
birdy654_cifake_real_and_ai_generated_synthetic_images_path = kagglehub.dataset_download('birdy654/cifake-real-and-ai-generated-synthetic-images')

print('Data source import complete.')


Data source import complete.


In [2]:
import os
import cv2
import numpy as np
from skimage.feature import local_binary_pattern
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import logging
import sys

In [4]:
import os

# Limit TensorFlow GPU memory growth
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # Suppress TensorFlow logs (optional)

import tensorflow as tf

# Check GPU availability
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Enable dynamic memory growth to avoid grabbing all memory
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"TensorFlow is set to use GPU: {[gpu.name for gpu in gpus]}")
    except RuntimeError as e:
        print(f"ERROR: Could not set memory growth: {e}")
else:
    print("WARNING: No GPU detected. Running on CPU.")

TensorFlow is set to use GPU: ['/physical_device:GPU:0']


In [8]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Input, concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications.resnet50 import preprocess_input as resnet_preprocess
from sklearn.model_selection import train_test_split # Useful for splitting if not using separate dirs
import sys

# Configure logging (ensure stdout for Colab)
for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s - %(levelname)s - %(message)s',
                    stream=sys.stdout)

IMG_SIZE = 224 # Standard ResNet input size



# Modify the extract_lbp_features function (cell ID: f8a1ef55)
def extract_lbp_features(gray_image, radius=1, n_points=8):
    """Computes LBP features from a grayscale image and returns its histogram."""
    try:
        lbp = local_binary_pattern(gray_image, n_points, radius, method='uniform')
        n_bins = int(lbp.max() + 1)
        hist, _ = np.histogram(lbp.ravel(), bins=n_bins, range=(0, n_bins), density=True)
        if len(hist) == 0:
             expected_bins = n_points + 2
             print(f"WARNING: LBP histogram was empty, returning zeros. Expected bins: {expected_bins}") # Replaced logging.warning
             return np.zeros(expected_bins)
        return hist
    except Exception as e:
        print(f"ERROR: Error processing LBP: {e}") # Replaced logging.error
        expected_bins = n_points + 2
        return np.zeros(expected_bins)

# filepath: /home/hidden-machine/Documents/vikasya/classifier.ipynb
# Modify the extract_og_features function (cell ID: 6570ff10)
def extract_og_features(gray_image, n_bins=8):
    """Calculates histogram of oriented gradients (magnitude weighted)."""
    try:
        gx = cv2.Sobel(gray_image, cv2.CV_64F, 1, 0, ksize=3)
        gy = cv2.Sobel(gray_image, cv2.CV_64F, 0, 1, ksize=3)
        magnitude, angle = cv2.cartToPolar(gx, gy, angleInDegrees=True)
        hist, _ = np.histogram(angle.ravel(), bins=n_bins, range=(0, 360), weights=magnitude.ravel(), density=False)
        hist_sum = np.sum(hist)
        if hist_sum > 0:
            hist = hist / hist_sum
        else:
             hist = np.zeros(n_bins)
        return hist
    except Exception as e:
        print(f"ERROR: Error in OG extraction: {e}") # Replaced logging.error
        return np.zeros(n_bins)

# filepath: /home/hidden-machine/Documents/vikasya/classifier.ipynb
# Modify the extract_ssee_features function (cell ID: 35aba565)
def extract_ssee_features(image, gray_image, quality=75):
    """Calculates statistics of the difference map after JPEG compression/decompression."""
    try:
        encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), quality]
        result, encimg = cv2.imencode('.jpg', image, encode_param)
        if not result:
            print("WARNING: Failed to encode image to JPEG.") # Replaced logging.warning
            return np.array([0.0, 0.0])
        decimg_gray = cv2.imdecode(encimg, cv2.IMREAD_GRAYSCALE)
        if decimg_gray is None:
             print("WARNING: Failed to decode image from JPEG.") # Replaced logging.warning
             return np.array([0.0, 0.0])
        if decimg_gray.shape != gray_image.shape:
             decimg_gray = cv2.resize(decimg_gray, (gray_image.shape[1], gray_image.shape[0]))
        diff = cv2.absdiff(gray_image, decimg_gray)
        mean_diff = np.mean(diff)
        std_diff = np.std(diff)
        return np.array([mean_diff, std_diff])
    except Exception as e:
        print(f"ERROR: Error in SSEE (JPEG Error) extraction: {e}") # Replaced logging.error
        return np.array([0.0, 0.0])




# Modify the extract_combined_features_and_image function (cell ID: 74bed73a)

def extract_combined_features_and_image(image_path, target_size=(IMG_SIZE, IMG_SIZE), lbp_radius=1, lbp_n_points=8, og_n_bins=8, ssee_quality=75):
    """Reads an image, extracts features, preprocesses image for ResNet, and returns both."""
    try:
        image = cv2.imread(image_path)
        if image is None:
            print(f"WARNING: Could not read image: {image_path}") # Replaced logging.warning
            return None, None

        # --- Feature Extraction ---
        gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        # Assuming extract_lbp_features, extract_og_features, extract_ssee_features also have logging replaced with print if modified
        lbp_hist = extract_lbp_features(gray_image, radius=lbp_radius, n_points=lbp_n_points)
        og_hist = extract_og_features(gray_image, n_bins=og_n_bins)
        ssee_feats = extract_ssee_features(image, gray_image, quality=ssee_quality)

        combined_features = np.concatenate((
            lbp_hist.ravel(),
            og_hist.ravel(),
            ssee_feats.ravel()
        )).astype(np.float32)

        # --- Image Preprocessing for ResNet ---
        img_resized = cv2.resize(image, target_size)
        if len(img_resized.shape) == 2:
             img_resized = cv2.cvtColor(img_resized, cv2.COLOR_GRAY2BGR)
        elif img_resized.shape[2] == 1:
             img_resized = cv2.cvtColor(img_resized, cv2.COLOR_GRAY2BGR)

        img_array = np.expand_dims(img_resized, axis=0)
        img_preprocessed = resnet_preprocess(img_array)
        img_final = img_preprocessed[0]

        return img_final, combined_features

    except Exception as e:
        print(f"ERROR: Error processing {image_path} for combined features and image: {e}") # Replaced logging.error
        return None, None

# Replace the code in the load_data cell (ID: 1c91c9c7) with this load_data_multi_input function

def load_data_multi_input(data_dir):
    """Loads data, extracts features AND preprocessed images."""
    images = []
    features = []
    labels = []
    categories = ['Fake', 'Real'] # 0 for Fake, 1 for Real

    for category_index, category in enumerate(categories):
        category_path = os.path.join(data_dir, category)
        if not os.path.isdir(category_path):
            print(f"WARNING: Directory not found: {category_path}") # Replaced logging.warning
            continue

        print(f"Loading data from: {category_path}") # Replaced logging.info
        image_files = [f for f in os.listdir(category_path)
                       if os.path.isfile(os.path.join(category_path, f))
                       and f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff'))]

        num = len(image_files)

        # --- Reduced dataset for faster testing ---
        num = len(image_files)
        image_files_subset = image_files[:int(num*0.01)] # Using only 1%
        print(f"Processing {len(image_files_subset)} images from {category} (1% subset)...") # Replaced logging.info
        # --- End reduced dataset ---

        count = 0
        processed_count = 0
        for image_name in image_files_subset: # Iterate over the subset
            image_path = os.path.join(category_path, image_name)
            img_final, combined_features_vec = extract_combined_features_and_image(image_path)

            if img_final is not None and combined_features_vec is not None and combined_features_vec.size > 0:
                images.append(img_final)
                features.append(combined_features_vec)
                labels.append(category_index)
                processed_count += 1
            else:
                 print(f"WARNING: Skipping image due to processing failure: {image_path}") # Replaced logging.warning

            count += 1
            if count % 100 == 0: # Log progress
                 print(f"Attempted processing {count}/{len(image_files_subset)} images in {category}...") # Replaced logging.info

        print(f"Finished loading {category}. Successfully processed {processed_count}/{count} images.") # Replaced logging.info

    if not features or not images:
        print(f"ERROR: No data was successfully loaded from {data_dir}.") # Replaced logging.error
        return [], [], []

    feature_length = features[0].shape[0]
    print(f"Determined feature vector length: {feature_length}") # Replaced logging.info

    return images, features, labels



# Replace the code in the Load Train and Test Data cell (ID: e929403f)

base_dir = '/kaggle/input/deepfake-and-real-images/Dataset/'
train_dir = os.path.join(base_dir, 'Train')
test_dir = os.path.join(base_dir, 'Test')

print("Starting multi-input data loading...") # Replaced logging.info
train_images_list, train_features_list, train_labels_list = load_data_multi_input(train_dir)
test_images_list, test_features_list, test_labels_list = load_data_multi_input(test_dir)
print("Multi-input data loading complete.") # Replaced logging.info

# Convert lists to NumPy arrays
X_train_img = np.array(train_images_list)
X_train_feat = np.array(train_features_list)
y_train = np.array(train_labels_list)

X_test_img = np.array(test_images_list)
X_test_feat = np.array(test_features_list)
y_test = np.array(test_labels_list)

# One-hot encode labels for categorical crossentropy
y_train_cat = to_categorical(y_train, num_classes=2)
y_test_cat = to_categorical(y_test, num_classes=2)

# Get feature length for model input shape
FEATURE_LENGTH = X_train_feat.shape[1] if X_train_feat.size > 0 else 0 # Handle empty case

print(f"Training images shape: {X_train_img.shape}, Features shape: {X_train_feat.shape}, Labels shape: {y_train_cat.shape}") # Replaced logging.info
print(f"Test images shape: {X_test_img.shape}, Features shape: {X_test_feat.shape}, Labels shape: {y_test_cat.shape}") # Replaced logging.info


# Replace the code in the Preprocessing cell (ID: b422ea8a)

# Scale only the feature vectors
feature_scaler = StandardScaler()
if X_train_feat.size > 0: # Check if features exist before scaling
    X_train_feat_scaled = feature_scaler.fit_transform(X_train_feat)
    X_test_feat_scaled = feature_scaler.transform(X_test_feat)
    print("Feature scaling complete. Image preprocessing was done during loading.") # Replaced logging.info
else:
    print("WARNING: No features found to scale.")
    X_train_feat_scaled = X_train_feat # Keep as empty array
    X_test_feat_scaled = X_test_feat   # Keep as empty array


# Prepare data for Keras multi-input model
X_train_keras = [X_train_img, X_train_feat_scaled]
X_test_keras = [X_test_img, X_test_feat_scaled]

Starting multi-input data loading...
Loading data from: /kaggle/input/deepfake-and-real-images/Dataset/Train/Fake
Processing 700 images from Fake (1% subset)...
Attempted processing 100/700 images in Fake...
Attempted processing 200/700 images in Fake...
Attempted processing 300/700 images in Fake...
Attempted processing 400/700 images in Fake...
Attempted processing 500/700 images in Fake...
Attempted processing 600/700 images in Fake...
Attempted processing 700/700 images in Fake...
Finished loading Fake. Successfully processed 700/700 images.
Loading data from: /kaggle/input/deepfake-and-real-images/Dataset/Train/Real
Processing 700 images from Real (1% subset)...
Attempted processing 100/700 images in Real...
Attempted processing 200/700 images in Real...
Attempted processing 300/700 images in Real...
Attempted processing 400/700 images in Real...
Attempted processing 500/700 images in Real...
Attempted processing 600/700 images in Real...
Attempted processing 700/700 images in Rea

In [15]:
from tensorflow.keras.callbacks import ModelCheckpoint

NUM_CLASSES = 2

print("Building multi-input ResNet + Features model...") # Replaced logging.info

# --- Image Input Branch (ResNet) ---
image_input = Input(shape=(IMG_SIZE, IMG_SIZE, 3), name='image_input')
base_model = ResNet50(weights='imagenet', include_top=False, input_tensor=image_input)
base_model.trainable = False
image_features = base_model.output
image_features = GlobalAveragePooling2D(name='image_gap')(image_features)

# --- Handcrafted Features Input Branch ---
if 'FEATURE_LENGTH' not in globals() or FEATURE_LENGTH == 0:
    print("ERROR: FEATURE_LENGTH not defined or is zero. Cannot build feature branch.")
    FEATURE_LENGTH = 1

feature_input = Input(shape=(FEATURE_LENGTH,), name='feature_input')
feature_dense = Dense(64, activation='relu', name='feature_dense_1')(feature_input)
feature_dense = Dense(32, activation='relu', name='feature_dense_2')(feature_dense)

# --- Combine Branches ---
combined = concatenate([image_features, feature_dense], name='concatenate_features')

# --- Classification Head ---
final_dense = Dense(512, activation='relu', name='final_dense_1')(combined)
predictions = Dense(NUM_CLASSES, activation='softmax', name='output_predictions')(final_dense)

# --- Create Model ---
multi_input_model = Model(inputs=[image_input, feature_input], outputs=predictions)

# --- Compile Model ---
multi_input_model.compile(optimizer=Adam(learning_rate=0.0001),
                          loss='categorical_crossentropy',
                          metrics=['accuracy'])

print("Multi-input model built and compiled.") # Replaced logging.info
multi_input_model.summary(print_fn=lambda x: print(x)) # Print model summary

# --- Prepare ModelCheckpoint Callback ---
checkpoint_path = 'best_model.keras'  # Change path if needed
checkpoint_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    monitor='val_loss',       # Monitor validation loss
    save_best_only=True,      # Save only the best model
    save_weights_only=False,  # Save the full model (architecture + weights)
    verbose=1
)

# --- Train Model ---
print("Training multi-input model...") # Replaced logging.info

if X_train_keras[0].shape[0] > 0 and X_train_keras[1].shape[1] == FEATURE_LENGTH and X_test_keras[0].shape[0] > 0:
    history = multi_input_model.fit(
        X_train_keras, y_train_cat,
        epochs=25,
        batch_size=32,
        validation_data=(X_test_keras, y_test_cat),
        callbacks=[checkpoint_callback],  # <-- ADD CALLBACK HERE
        verbose=1
    )
    print("Multi-input model training complete.") # Replaced logging.info
    print(f"Best model saved to: {checkpoint_path}")
else:
    print("ERROR: Training data is empty or feature length mismatch. Cannot train model.") # Replaced logging.error
    print(f"Shapes: X_train_img={X_train_keras[0].shape}, X_train_feat={X_train_keras[1].shape}, Expected feat length={FEATURE_LENGTH}")

Building multi-input ResNet + Features model...
Multi-input model built and compiled.


Model: "functional_2"
┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓
┃ Layer (type)        ┃ Output Shape      ┃    Param # ┃ Connected to      ┃
┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩
│ image_input         │ (None, 224, 224,  │          0 │ -                 │
│ (InputLayer)        │ 3)                │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ conv1_pad           │ (None, 230, 230,  │          0 │ image_input[0][0] │
│ (ZeroPadding2D)     │ 3)                │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ conv1_conv (Conv2D) │ (None, 112, 112,  │      9,472 │ conv1_pad[0][0]   │
│                     │ 64)               │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ conv1_bn            │ (None, 112, 112,  │        256

In [16]:
model_save_path = 'model.keras'

# Save the entire model (architecture + weights + optimizer state)
multi_input_model.save(model_save_path)

print(f"Model saved successfully to {model_save_path}")

Model saved successfully to model.keras
