In [3]:
import numpy as np 
import pandas as pd 
import torch 
import torch.nn as nn
import os
import PIL.Image as Image
import kagglehub


In [3]:
%pip install kagglehub

Collecting kagglehub
  Downloading kagglehub-0.3.4-py3-none-any.whl.metadata (22 kB)
Downloading kagglehub-0.3.4-py3-none-any.whl (43 kB)
Installing collected packages: kagglehub
Successfully installed kagglehub-0.3.4

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [4]:

path = kagglehub.dataset_download("xhlulu/140k-real-and-fake-faces")

print("Path to dataset files:", path)


Path to dataset files: /Users/advaykadam/.cache/kagglehub/datasets/xhlulu/140k-real-and-fake-faces/versions/2


In [5]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os

train_data_dir = os.path.join(path, 'real_vs_fake/real-vs-fake/train')

test_data_dir = os.path.join(path, 'real_vs_fake/real-vs-fake/test')

In [6]:
train_datagen = ImageDataGenerator(rescale=1./255)

target_size = (224, 224)
batch_size = 100  

train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size= target_size, 
    batch_size=batch_size,
    class_mode='binary', 
    shuffle=True
)

Found 100000 images belonging to 2 classes.


In [8]:
train_generator.class_indices

{'fake': 0, 'real': 1}

In [44]:
from tensorflow.keras.applications import Xception
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras.optimizers.legacy import Adam  

def build_model():
    shape = (224, 224, 3)
    
    Xception_mod_trained = Xception(input_shape=shape, include_top=False, weights='imagenet')  
    
    
    final_model_transfer = Sequential()
    
    final_model_transfer.add(Xception_mod_trained)
    
    final_model_transfer.add(layers.GlobalAveragePooling2D())
    final_model_transfer.add(layers.Dense(1024, activation='relu'))
    final_model_transfer.add(layers.Dropout(0.5))
    
    final_model_transfer.add(layers.Dense(1, activation='sigmoid'))  
    
    final_model_transfer.compile(optimizer=Adam(learning_rate=1e-5), metrics=['accuracy'], loss = 'binary_crossentropy')


    return final_model_transfer

In [45]:
final_model_transfer.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 xception (Functional)       (None, 7, 7, 2048)        20861480  
                                                                 
 global_average_pooling2d (  (None, 2048)              0         
 GlobalAveragePooling2D)                                         
                                                                 
 dense (Dense)               (None, 1024)              2098176   
                                                                 
 dropout (Dropout)           (None, 1024)              0         
                                                                 
 dense_1 (Dense)             (None, 1)                 1025      
                                                                 
Total params: 22960681 (87.59 MB)
Trainable params: 22906153 (87.38 MB)
Non-trainable params: 54528 (213.00 KB)
__________

In [46]:
from tensorflow.keras.callbacks import ModelCheckpoint


checkpoint_callback = ModelCheckpoint(
    'model_checkpoint_{epoch:03d}.h5',  
    save_weights_only=True,  
    save_freq='epoch', 
    verbose=1
)

In [47]:
import numpy as np

def train_in_chunks(train_generator, model, batch_size=100, chunk_size=10000, checkpoint_callback=None):

    total_samples = train_generator.samples 
    
    steps_per_chunk = chunk_size // batch_size  
    
    for start_idx in range(0, total_samples, chunk_size):

        end_idx = min(start_idx + chunk_size, total_samples)
        
     
        model.fit(
            train_generator,
            steps_per_epoch=steps_per_chunk,
            epochs=1, 
            callbacks=[checkpoint_callback]
        )

In [48]:
tran_model = build_model()

In [49]:
train_in_chunks(train_generator, tran_model, chunk_size=10000, batch_size=100, checkpoint_callback=checkpoint_callback)

Epoch 1: saving model to model_checkpoint_001.h5
Epoch 1: saving model to model_checkpoint_001.h5
Epoch 1: saving model to model_checkpoint_001.h5
Epoch 1: saving model to model_checkpoint_001.h5
Epoch 1: saving model to model_checkpoint_001.h5
Epoch 1: saving model to model_checkpoint_001.h5
Epoch 1: saving model to model_checkpoint_001.h5
Epoch 1: saving model to model_checkpoint_001.h5
Epoch 1: saving model to model_checkpoint_001.h5
Epoch 1: saving model to model_checkpoint_001.h5


Test Data Generation

In [10]:
test_datagen = ImageDataGenerator(rescale=1./255)

target_size = (224, 224)
batch_size = 100  

test_generator = test_datagen.flow_from_directory(
    test_data_dir,
    target_size= target_size, 
    batch_size=batch_size,
    class_mode='binary', 
    shuffle=True
)

Found 20000 images belonging to 2 classes.


In [11]:
test_generator.class_indices

{'fake': 0, 'real': 1}

In [51]:
tran_model.evaluate(test_generator)



[0.10337487608194351, 0.9606500267982483]

Validation

In [None]:
tran_model.save("transfer_model.keras")