

images are divided into train, test and validation set. Each image is a 256 X 256 jpg image of human face either real or fake



In [1]:
!kaggle datasets download -d jangedoo/utkface-new

Dataset URL: https://www.kaggle.com/datasets/jangedoo/utkface-new
License(s): copyright-authors
Downloading utkface-new.zip to /content
 97% 321M/331M [00:01<00:00, 229MB/s]
100% 331M/331M [00:01<00:00, 182MB/s]


In [2]:
import os
import zipfile

# Path to the downloaded file
zip_path = 'utkface-new.zip'  # Corrected the missing closing quote
extract_path = 'UTKImages'

# Create the directory for extraction if it doesn't exist
os.makedirs(extract_path, exist_ok=True)

# Unzip the file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

In [3]:
import shutil
import random
from sklearn.model_selection import train_test_split

# Paths to your dataset folders
aligned_crop_dir = '/content/UTKImages/utkface_aligned_cropped/crop_part1'
output_dir = 'UTKTrainer'

# Create output directories
train_dir = os.path.join(output_dir, 'train')
val_dir = os.path.join(output_dir, 'validation')
test_dir = os.path.join(output_dir, 'test')

os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Get all image filenames from the aligned crop directory
all_images = [img for img in os.listdir(aligned_crop_dir) if img.endswith(('.jpg', '.png'))]

# Split the images into train (60%), test (20%), and validation (20%) sets
train_images, temp_images = train_test_split(all_images, test_size=0.4, random_state=42)
val_images, test_images = train_test_split(temp_images, test_size=0.5, random_state=42)

# Function to move images to the respective directories
def move_images(image_list, destination_folder):
    for img_name in image_list:
        src_path = os.path.join(aligned_crop_dir, img_name)
        dst_path = os.path.join(destination_folder, img_name)
        shutil.copy(src_path, dst_path)

# Move images to their respective directories
move_images(train_images, train_dir)
move_images(val_images, val_dir)
move_images(test_images, test_dir)

print(f"Dataset organized successfully into train, validation, and test directories!")

Dataset organized successfully into train, validation, and test directories!


In [4]:
import os
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.resnet50 import preprocess_input

# Define dataset paths
train_dir = 'UTKTrainer/train'
val_dir = 'UTKTrainer/validation'
test_dir = 'UTKTrainer/test'

# Function to create DataFrame from directory
def create_dataframe(directory):
    file_paths = []
    ages = []
    genders = []

    for filename in os.listdir(directory):
        if filename.endswith(".jpg"):  # Ensure only image files are processed
            # Extract age and gender from filename (age_gender_race_date.jpg)
            parts = filename.split('_')
            age = int(parts[0])
            gender = int(parts[1])  # 0 = Male, 1 = Female

            file_paths.append(os.path.join(directory, filename))
            ages.append(age)
            genders.append(gender)

    return pd.DataFrame({
        'path': file_paths,
        'age': ages,
        'gender': genders
    })

# Create DataFrames for train, validation, and test sets
train_df = create_dataframe(train_dir)
val_df = create_dataframe(val_dir)
test_df = create_dataframe(test_dir)

# ImageDataGenerator with preprocess function
train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input, horizontal_flip=True, zoom_range=0.2)
val_test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

# Load images and labels from DataFrame
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='path',
    y_col='age',  # Assuming you're predicting age (can be adjusted if you want gender)
    target_size=(256, 256),
    batch_size=64,
    class_mode='raw'  # For regression (predicting age), use 'raw'
)

val_generator = val_test_datagen.flow_from_dataframe(
    dataframe=val_df,
    x_col='path',
    y_col='age',
    target_size=(256, 256),
    batch_size=64,
    class_mode='raw'
)

test_generator = val_test_datagen.flow_from_dataframe(
    dataframe=test_df,
    x_col='path',
    y_col='age',
    target_size=(256, 256),
    batch_size=64,
    class_mode='raw',
    shuffle=False
)

Found 5868 validated image filenames.
Found 1956 validated image filenames.
Found 1956 validated image filenames.


In [5]:
train_df.head()

Unnamed: 0,path,age,gender
0,UTKTrainer/train/48_1_0_20170109220544479.jpg....,48,1
1,UTKTrainer/train/35_1_3_20170104220136126.jpg....,35,1
2,UTKTrainer/train/62_0_0_20170111203056771.jpg....,62,0
3,UTKTrainer/train/32_0_4_20170103235323876.jpg....,32,0
4,UTKTrainer/train/48_1_0_20170103224620464.jpg....,48,1


In [6]:
train_generator

<keras.src.preprocessing.image.DataFrameIterator at 0x7fae34197ca0>

In [7]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam

# Load pre-trained ResNet50 model without top layers
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(256, 256, 3))

# Freeze the base model layers
for layer in base_model.layers:
    layer.trainable = False

# Add custom classification layers
x = Flatten()(base_model.output)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(1, activation='sigmoid')(x)  # Binary classification

# Compile the model
model = Model(inputs=base_model.input, outputs=x)
model.compile(optimizer=Adam(learning_rate=1e-4), loss='mean_squared_error', metrics=['mae'])

model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 256, 256, 3)]        0         []                            
                                                                                                  
 conv1_pad (ZeroPadding2D)   (None, 262, 262, 3)          0         ['input_1[0][0]']             
                                                                                                  
 conv1_conv (Conv2D)         (None, 128, 128, 64)         9472      ['conv1_pad[0][0]']           
                                                                                                  
 conv1_bn (BatchNormalizati  (None, 128, 128, 64) 

In [8]:
checkpoint_dir = 'model_checkpoints'
os.makedirs(checkpoint_dir, exist_ok=True)

In [9]:
from tensorflow.keras.callbacks import ModelCheckpoint
checkpoint_callback = ModelCheckpoint(
    filepath=os.path.join(checkpoint_dir, 'resnet50_epoch_{epoch:02d}.keras'),
    save_weights_only=False,
    save_freq='epoch',
    verbose=1
)

In [10]:
history = model.fit(
    train_generator,
    epochs=10,
    validation_data=val_generator,
    callbacks=[checkpoint_callback]
)

Epoch 1/10
Epoch 1: saving model to model_checkpoints/resnet50_epoch_01.keras
Epoch 2/10
Epoch 2: saving model to model_checkpoints/resnet50_epoch_02.keras
Epoch 3/10
Epoch 3: saving model to model_checkpoints/resnet50_epoch_03.keras
Epoch 4/10
Epoch 4: saving model to model_checkpoints/resnet50_epoch_04.keras
Epoch 5/10
Epoch 5: saving model to model_checkpoints/resnet50_epoch_05.keras
Epoch 6/10
Epoch 6: saving model to model_checkpoints/resnet50_epoch_06.keras
Epoch 7/10
Epoch 7: saving model to model_checkpoints/resnet50_epoch_07.keras
Epoch 8/10
Epoch 8: saving model to model_checkpoints/resnet50_epoch_08.keras
Epoch 9/10
Epoch 9: saving model to model_checkpoints/resnet50_epoch_09.keras
Epoch 10/10
Epoch 10: saving model to model_checkpoints/resnet50_epoch_10.keras


https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9588729/

for audio datasets: https://paperswithcode.com/dataset/deep-voice-deepfake-voice-recognition

In [11]:
model.save('resnet50.h5')

  saving_api.save_model(


In [12]:
# Unfreeze layers from the last few blocks
for layer in base_model.layers[-10:]:
    layer.trainable = True

# Recompile with a lower learning rate
model.compile(optimizer=Adam(learning_rate=1e-5), loss='binary_crossentropy', metrics=['accuracy'])

# Continue training
history_finetune = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=5,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    validation_steps=val_generator.samples // val_generator.batch_size
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [13]:
test_loss, test_acc = model.evaluate(test_generator, steps=test_generator.samples // test_generator.batch_size)
print(f'Test Accuracy: {test_acc * 100:.2f}%')

Test Accuracy: 10.63%


In [14]:
model.save('resnet50.h5')

In [15]:
model.save_weights('resnet50_Weights.weights.h5')