# Load Dataset

In [5]:
!pip install datasets --q

In [6]:
from datasets import load_dataset

In [7]:
Dataset = load_dataset("Bahareh0281/liveness_images")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [32]:
Dataset['train'][7]

{'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=256x256>,
 'label': 0}

# Import Necessary Libraries

In [77]:
import cv2
import numpy as np
import os
from skimage.feature import local_binary_pattern
from skimage import measure
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from PIL import Image
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

# Feature Extraction Functions

In [11]:
radius = 3
n_points = 8 * radius

def compute_fourier_transform(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    f = np.fft.fft2(gray)
    fshift = np.fft.fftshift(f)
    magnitude_spectrum = 20 * np.log(np.abs(fshift))
    return magnitude_spectrum

def compute_lbp(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    lbp = local_binary_pattern(gray, n_points, radius, method="uniform")
    return lbp

def compute_depth(image):
    # به عنوان مثال از کانال آبی برای تخمین عمق استفاده می‌کنیم
    depth = image[:, :, 2]
    return depth

def extract_statistical_features(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    mean = np.mean(gray)
    std_dev = np.std(gray)
    skewness = np.mean((gray - mean) ** 3) / (std_dev ** 3)
    kurtosis = np.mean((gray - mean) ** 4) / (std_dev ** 4)
    entropy = measure.shannon_entropy(gray)
    return mean, std_dev, skewness, kurtosis, entropy

# Preprocess Input Images

In [34]:
def process_images(dataset, num=None):
    train_images_features = []
    train_images_labels = []
    train_data = dataset['train'][:num] if num else dataset['train']

    for idx, data in enumerate(train_data):
        print(f"Processing image {idx+1}/{len(train_data)}")  # Debug: Track progress
        if isinstance(data, dict) and 'image' in data and 'label' in data:
            img = data['image']
            if img is not None:
                img = np.array(img)
                # Extract frequency features
                magnitude_spectrum = compute_fourier_transform(img)
                magnitude_spectrum_mean = np.mean(magnitude_spectrum)

                # Extract LBP features
                lbp = compute_lbp(img)
                lbp_hist, _ = np.histogram(lbp, bins=np.arange(0, n_points + 3), range=(0, n_points + 2))
                lbp_hist = lbp_hist / lbp_hist.sum()

                # Extract depth features
                depth = compute_depth(img)
                depth_mean = np.mean(depth)

                # Extract statistical features
                mean, std_dev, skewness, kurtosis, entropy = extract_statistical_features(img)
                combined_features = np.concatenate([
                    [magnitude_spectrum_mean],
                    lbp_hist,
                    [depth_mean, mean, std_dev, skewness, kurtosis, entropy]
                ])
                train_images_features.append(combined_features)
                train_images_labels.append(data['label'])
        else:
            print(f"Invalid data format at index {idx}: {data}")  # Debug: Check for incorrect formats

    return np.array(train_images_features), np.array(train_images_labels)

In [40]:
len(Dataset['train'])

6427

In [68]:
def process_images(dataset, num=0):
    train_images_features = []
    train_images_labels = []
    if num == 0:
        num = len(dataset['train'])

    for i in range(num):
        img = dataset['train'][i]['image']
        if isinstance(img, Image.Image):
            img = np.array(img)  # Convert PIL image to NumPy array

            # Extract frequency features
            magnitude_spectrum = compute_fourier_transform(img)
            magnitude_spectrum_resized = cv2.resize(magnitude_spectrum, (64, 64))

            # Extract LBP features
            lbp = compute_lbp(img)
            lbp_hist, _ = np.histogram(lbp, bins=np.arange(0, n_points + 3), range=(0, n_points + 2))
            lbp_hist_normalized = lbp_hist / lbp_hist.sum()
            lbp_hist_resized = cv2.resize(lbp_hist_normalized.reshape(-1, 1), (64, 64))

            # Extract statistical features
            mean, std_dev, skewness, kurtosis, entropy = extract_statistical_features(img)
            statistical_features = np.array([mean, std_dev, skewness, kurtosis, entropy])
            statistical_features_resized = cv2.resize(statistical_features.reshape(-1, 1), (64, 64))

            # Combine features into a 3D array
            combined_features = np.stack([
                magnitude_spectrum_resized,
                lbp_hist_resized,
                statistical_features_resized
            ], axis=-1)

            train_images_features.append(combined_features)
            train_images_labels.append(dataset['train'][i]['label'])

    return np.array(train_images_features), np.array(train_images_labels)

In [69]:
# پردازش تصاویر و استخراج ویژگی‌ها
train_images_features,  train_images_labels = process_images(Dataset, 3000)


  magnitude_spectrum = 20 * np.log(np.abs(fshift))


In [70]:
len(train_images_features)

3000

In [74]:
train_images_features.shape

(3000, 64, 64, 3)

In [72]:
train_images_features[1]

array([[[9.98781304e+01, 1.70135498e-02, 1.23999863e+02],
        [1.03112881e+02, 1.70135498e-02, 1.23999863e+02],
        [1.00401371e+02, 1.70135498e-02, 1.23999863e+02],
        ...,
        [8.08424041e+01, 1.70135498e-02, 1.23999863e+02],
        [8.49438029e+01, 1.70135498e-02, 1.23999863e+02],
        [9.55299601e+01, 1.70135498e-02, 1.23999863e+02]],

       [[7.85450649e+01, 1.65896416e-02, 1.23999863e+02],
        [8.88281131e+01, 1.65896416e-02, 1.23999863e+02],
        [8.47440767e+01, 1.65896416e-02, 1.23999863e+02],
        ...,
        [9.16315104e+01, 1.65896416e-02, 1.23999863e+02],
        [9.06426121e+01, 1.65896416e-02, 1.23999863e+02],
        [8.25498088e+01, 1.65896416e-02, 1.23999863e+02]],

       [[9.15188864e+01, 1.50151253e-02, 1.23999863e+02],
        [9.45883978e+01, 1.50151253e-02, 1.23999863e+02],
        [9.23813503e+01, 1.50151253e-02, 1.23999863e+02],
        ...,
        [9.37271837e+01, 1.50151253e-02, 1.23999863e+02],
        [9.23618173e+01, 1.50

In [73]:
len(train_images_features[1])

64

In [67]:
len(train_images_features[0])

33

# Split training dataset and prepare it for train process

In [105]:
# Convert depth features to a numpy array
features = np.array(train_images_features)
labels = np.array(train_images_labels)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# One-hot encode the labels
y_train = to_categorical(y_train, num_classes=2)
y_test = to_categorical(y_test, num_classes=2)


# Create CNN Model and Train it

In [106]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam

# Build the CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(2, activation='softmax')
])

# Compile the model
model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


# Evaluate on dataset tests

In [107]:
# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')


Test Accuracy: 83.17%


# Load test videos

In [108]:
import gdown

file_id = '1a5R5h05hCyw9PzIBhSjy2jLL3dSFy2xA'
destination = '/content/dataset.zip'  # Path where the file will be saved
gdown.download(f'https://drive.google.com/uc?id={file_id}', destination, quiet=False)

import zipfile

with zipfile.ZipFile(destination, 'r') as zip_ref:
    zip_ref.extractall('/content/dataset')

Downloading...
From (original): https://drive.google.com/uc?id=1a5R5h05hCyw9PzIBhSjy2jLL3dSFy2xA
From (redirected): https://drive.google.com/uc?id=1a5R5h05hCyw9PzIBhSjy2jLL3dSFy2xA&confirm=t&uuid=b8ca89d8-b9bb-4ad1-8d07-8a514b991c62
To: /content/dataset.zip
100%|██████████| 377M/377M [00:07<00:00, 48.6MB/s]


# Generate random frames from each video

In [109]:
import random


def extract_frames(video_path, save_path, label, test):
    # Open the video file
    video = cv2.VideoCapture(video_path)
    frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))

    # Select one random frame
    random_frame = random.randint(0, frame_count - 1)

    # Set the position of the video to the selected frame
    video.set(cv2.CAP_PROP_POS_FRAMES, random_frame)
    success, frame = video.read()

    # If the frame was successfully read, save it
    if success:
        frame_path = os.path.join(save_path, f"{label}_{random_frame}.jpg")
        cv2.imwrite(frame_path, frame)

        # Convert the frame to a PIL image
        pil_image = Image.open(frame_path)

        # Save the image and label to the dictionary
        test.append({'image': pil_image, 'label': label})

    # Release the video file
    video.release()

In [110]:
import cv2

fake_test_videos_path = '/content/dataset/fake/test'
real_test_videos_path = '/content/dataset/real/test'

save_frames_path = '/content/extracted_frames/test'
# Create the directory if it doesn't exist
if not os.path.exists(save_frames_path):
    os.makedirs(save_frames_path)

# Create a list to hold the dictionary entries
test = []

# Iterate over fake videos and extract frames
for fake_video_file in os.listdir(fake_test_videos_path):
    fake_video_path = os.path.join(fake_test_videos_path, fake_video_file)
    extract_frames(fake_video_path, save_frames_path, 0, test)

# Iterate over real videos and extract frames
for real_video_file in os.listdir(real_test_videos_path):
    real_video_path = os.path.join(real_test_videos_path, real_video_file)
    extract_frames(real_video_path, save_frames_path, 1, test)

# Extract features from each frame

In [111]:
def process_images_from_directory(directory, num=0):
    train_images_features = []
    train_images_labels = []

    image_files = [f for f in os.listdir(directory) if f.endswith(('.png', '.jpg', '.jpeg'))]
    if num == 0 or num > len(image_files):
        num = len(image_files)

    for i, file_name in enumerate(image_files[:num]):
        file_path = os.path.join(directory, file_name)
        try:
            img = Image.open(file_path)
            img = np.array(img)  # Convert PIL image to NumPy array

            # Extract frequency features
            magnitude_spectrum = compute_fourier_transform(img)
            magnitude_spectrum_resized = cv2.resize(magnitude_spectrum, (64, 64))

            # Extract LBP features
            lbp = compute_lbp(img)
            lbp_hist, _ = np.histogram(lbp, bins=np.arange(0, n_points + 3), range=(0, n_points + 2))
            lbp_hist_normalized = lbp_hist / lbp_hist.sum()
            lbp_hist_resized = cv2.resize(lbp_hist_normalized.reshape(-1, 1), (64, 64))

            # Extract statistical features
            mean, std_dev, skewness, kurtosis, entropy = extract_statistical_features(img)
            statistical_features = np.array([mean, std_dev, skewness, kurtosis, entropy])
            statistical_features_resized = cv2.resize(statistical_features.reshape(-1, 1), (64, 64))

            # Combine features into a 3D array
            combined_features = np.stack([
                magnitude_spectrum_resized,
                lbp_hist_resized,
                statistical_features_resized
            ], axis=-1)

            train_images_features.append(combined_features)
            train_images_labels.append(0)  # Change this if you have actual labels

        except Exception as e:
            print(f"Error processing {file_path}: {e}")

    return np.array(train_images_features), np.array(train_images_labels)



# Extract features from each frame and convert labels to one-hot form

In [112]:
directory = '/content/extracted_frames/test'
test_frames_features, test_frames_labels = process_images_from_directory(directory)

test_frames_labels = to_categorical(test_frames_labels, num_classes=2)

In [114]:
test_frames_features.shape

(57, 64, 64, 3)

In [113]:
test_frames_labels.shape

(57, 2)

# Evaluate model on test set

In [115]:
test_loss, test_accuracy = model.evaluate(test_frames_features, test_frames_labels)
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')

Test Accuracy: 89.47%


In [117]:
# Make predictions

y_pred_proba = model.predict(test_frames_features)
predictions_for_frames = []
# Output the prediction vector for each test image
for idx, prediction_vector in enumerate(y_pred_proba):
    print(f"Prediction vector for test image {idx+1}: {prediction_vector}")
    predictions_for_frames.append((idx, prediction_vector))


Prediction vector for test image 1: [0.6881892  0.31181082]
Prediction vector for test image 2: [0.4422481 0.5577519]
Prediction vector for test image 3: [0.5819326 0.4180674]
Prediction vector for test image 4: [0.95320356 0.04679642]
Prediction vector for test image 5: [0.86064345 0.1393566 ]
Prediction vector for test image 6: [0.56917477 0.4308252 ]
Prediction vector for test image 7: [0.9508233  0.04917675]
Prediction vector for test image 8: [0.8650313  0.13496868]
Prediction vector for test image 9: [0.6687024  0.33129755]
Prediction vector for test image 10: [0.98190844 0.01809159]
Prediction vector for test image 11: [0.59112114 0.4088789 ]
Prediction vector for test image 12: [0.853582   0.14641802]
Prediction vector for test image 13: [0.66795874 0.33204123]
Prediction vector for test image 14: [0.5820522  0.41794783]
Prediction vector for test image 15: [0.97661954 0.02338043]
Prediction vector for test image 16: [0.38508773 0.6149123 ]
Prediction vector for test image 17: 