In [1]:
# IMPORTANT: SOME KAGGLE DATA SOURCES ARE PRIVATE
# RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES.
import kagglehub
kagglehub.login()


VBox(children=(HTML(value='<center> <img\nsrc=https://www.kaggle.com/static/images/site-logo.png\nalt=\'Kaggle…

Kaggle credentials set.
Kaggle credentials successfully validated.


In [None]:
from google.colab import files
files.upload()  

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"kavyasoni99","key":"8af49e2414e00c9d3df8901ab9fd194a"}'}

In [3]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/kaggle.json
!chmod 600 ~/.kaggle/kaggle.json


In [4]:
!kaggle datasets download -d kavyasoni99/ff-face-cropped

Dataset URL: https://www.kaggle.com/datasets/kavyasoni99/ff-face-cropped
License(s): MIT


In [5]:
!unzip ff-face-cropped.zip -d ff-face-cropped/


Archive:  ff-face-cropped.zip
  inflating: ff-face-cropped/ModelData/Test/fake/01_11__walking_outside_cafe_disgusted__FAFWDR4W.mp4  
  inflating: ff-face-cropped/ModelData/Test/fake/01_21__walk_down_hall_angry__03X7CELV.mp4  
  inflating: ff-face-cropped/ModelData/Test/fake/02_01__secret_conversation__YVGY8LOK.mp4  
  inflating: ff-face-cropped/ModelData/Test/fake/02_06__podium_speech_happy__N8OSN8P6.mp4  
  inflating: ff-face-cropped/ModelData/Test/fake/02_07__walking_down_street_outside_angry__O4SXNLRL.mp4  
  inflating: ff-face-cropped/ModelData/Test/fake/02_15__secret_conversation__MZWH8ATN.mp4  
  inflating: ff-face-cropped/ModelData/Test/fake/02_15__talking_against_wall__HTG660F8.mp4  
  inflating: ff-face-cropped/ModelData/Test/fake/02_15__walking_and_outside_surprised__MZWH8ATN.mp4  
  inflating: ff-face-cropped/ModelData/Test/fake/02_21__talking_angry_couch__Z0XHPQAR.mp4  
  inflating: ff-face-cropped/ModelData/Test/fake/03_07__walking_down_indoor_hall_disgust__PWXXULHR.mp4  


#CSV Creation

In [None]:
import os
import csv

def get_video_data(folder_path, tag):
    
    video_data = []
    for idx, video_name in enumerate(os.listdir(folder_path)):
        video_path = os.path.join(folder_path, video_name)
        if os.path.isfile(video_path):
            video_data.append((idx, video_path, tag))
    return video_data

def create_csv_file(output_csv, data):
    with open(output_csv, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Index', 'Video Name', 'Tag'])
        writer.writerows(data)

def main():
    train_folder = '/content/ff-face-cropped/ModelData/Test'
    output_csv = './test.csv'

    real_folder = os.path.join(train_folder, 'real')
    fake_folder = os.path.join(train_folder, 'fake')

    real_videos = get_video_data(real_folder, 'real')
    fake_videos = get_video_data(fake_folder, 'fake')

    all_videos = real_videos + fake_videos
    create_csv_file(output_csv, all_videos)

if __name__ == "__main__":
    main()

#Preprocessing for EfficientNetB0

In [8]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.layers import LSTM, Dense, TimeDistributed, GlobalAveragePooling2D, Input
from tensorflow.keras.preprocessing.image import img_to_array

In [9]:
#Load train and test csv files
train_df = pd.read_csv('./train.csv')
test_df = pd.read_csv('./test.csv')

## Extract Frames from video

In [None]:
from tensorflow.keras.applications.efficientnet import preprocess_input

def extract_frames(video_path, num_frames=30, target_size=(224, 224)):
    cap = cv2.VideoCapture(video_path)
    frames = []
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_interval = max(1, total_frames // num_frames)

    for i in range(num_frames):
        cap.set(cv2.CAP_PROP_POS_FRAMES, i * frame_interval)
        ret, frame = cap.read()
        if not ret:
            break

        frame = cv2.convertScaleAbs(frame, alpha=1.2, beta=10)

        frame = cv2.resize(frame, target_size, interpolation=cv2.INTER_AREA)

        frame = preprocess_input(frame.astype(np.float32))

        frames.append(frame)

    cap.release()
    return np.stack(frames, axis=0)


In [11]:
from tensorflow.keras.utils import Sequence
import numpy as np

class VideoDataGenerator(Sequence):
    def __init__(self, df, batch_size=4, num_frames=30, target_size=(224, 224), shuffle=True):
        self.df = df.reset_index(drop=True)
        self.batch_size = batch_size
        self.num_frames = num_frames
        self.target_size = target_size
        self.shuffle = shuffle
        self.indices = np.arange(len(self.df))
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.df) / self.batch_size))

    def __getitem__(self, idx):
        batch_indices = self.indices[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_df = self.df.iloc[batch_indices]

        X, y = [], []
        for _, row in batch_df.iterrows():
            frames = extract_frames(row['Video Name'], self.num_frames, self.target_size)
            if frames.shape[0] == self.num_frames:
                X.append(frames)
                y.append(1 if row['Tag'].lower() == 'fake' else 0)

        return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)


In [None]:
#Prepare datasets
train_gen = VideoDataGenerator(train_df, batch_size=4)
test_gen = VideoDataGenerator(test_df, batch_size=4, shuffle=False)

## Defining Model Architecture

In [None]:
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import Input, TimeDistributed, GlobalAveragePooling2D, LSTM, Dense
from tensorflow.keras.models import Model

# Input shape: 30 frames of size 224x224x3
input_layer = Input(shape=(30, 224, 224, 3))

# EfficientNetB0 base model for feature extraction
base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False

# Apply EfficientNetB0 to each frame
x = TimeDistributed(base_model)(input_layer)
x = TimeDistributed(GlobalAveragePooling2D())(x)

# LSTM layers for temporal modeling
x = LSTM(128, return_sequences=True)(x)
x = LSTM(64, return_sequences=False)(x)

# Fully connected layers
x = Dense(64, activation='relu')(x)
out = Dense(1, activation='sigmoid')(x)

# Build and compile the model
model = Model(inputs=input_layer, outputs=out)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
model.fit(train_gen, validation_data=test_gen, epochs=10)

  self._warn_if_super_not_called()


Epoch 1/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m435s[0m 2s/step - accuracy: 0.5367 - loss: 0.6916 - val_accuracy: 0.6441 - val_loss: 0.6295
Epoch 2/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 519ms/step - accuracy: 0.6813 - loss: 0.5958 - val_accuracy: 0.5932 - val_loss: 0.6573
Epoch 3/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 539ms/step - accuracy: 0.7177 - loss: 0.5651 - val_accuracy: 0.6949 - val_loss: 0.5654
Epoch 4/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 527ms/step - accuracy: 0.7472 - loss: 0.5190 - val_accuracy: 0.7458 - val_loss: 0.5923
Epoch 5/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 530ms/step - accuracy: 0.7822 - loss: 0.5069 - val_accuracy: 0.7627 - val_loss: 0.5166
Epoch 6/10
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 533ms/step - accuracy: 0.8021 - loss: 0.4286 - val_accuracy: 0.7627 - val_loss: 0.5241
Epoch 7/10
[1m85/85[0m

<keras.src.callbacks.history.History at 0x7d97614a6b90>

In [None]:
model.save('EfficientNetB0_LSTM.keras')

In [None]:
model.summary()

# Updated model architecture with residual layer, bsae model being trainable and one dropout layer

In [None]:
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import Input, TimeDistributed, GlobalAveragePooling2D, LSTM, Dense, Dropout, Add
from tensorflow.keras.models import Model

# Input: 30 frames of 224x224 RGB images
input_layer = Input(shape=(30, 224, 224, 3))

# EfficientNetB0 base model
base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False

# Unfreeze last 5 layers
for layer in base_model.layers[-5:]:
    layer.trainable = True

# TimeDistributed feature extraction
x = TimeDistributed(base_model)(input_layer)
x = TimeDistributed(GlobalAveragePooling2D())(x)

# LSTM layers
x = LSTM(128, return_sequences=True)(x)
x = Dropout(0.3)(x)
x = LSTM(64, return_sequences=False)(x)
x = Dropout(0.3)(x)

# Dense + Residual
res = Dense(64, activation='relu')(x)
res = Dropout(0.3)(res)
x = Add()([x, res])

# Output
out = Dense(1, activation='sigmoid')(x)

# Model build
model2 = Model(inputs=input_layer, outputs=out)
model2.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
model2.summary()

In [None]:
model2.fit(train_gen, validation_data=test_gen, epochs=15)

  self._warn_if_super_not_called()


Epoch 1/15
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m448s[0m 2s/step - accuracy: 0.5182 - loss: 0.7467 - val_accuracy: 0.5932 - val_loss: 0.6700
Epoch 2/15
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 553ms/step - accuracy: 0.6420 - loss: 0.6461 - val_accuracy: 0.7119 - val_loss: 0.5805
Epoch 3/15
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 560ms/step - accuracy: 0.7029 - loss: 0.5457 - val_accuracy: 0.7119 - val_loss: 0.5482
Epoch 4/15
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 600ms/step - accuracy: 0.7817 - loss: 0.5241 - val_accuracy: 0.7797 - val_loss: 0.4719
Epoch 5/15
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 567ms/step - accuracy: 0.7576 - loss: 0.4813 - val_accuracy: 0.7627 - val_loss: 0.4816
Epoch 6/15
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 599ms/step - accuracy: 0.8207 - loss: 0.4379 - val_accuracy: 0.7797 - val_loss: 0.4498
Epoch 7/15
[1m85/85[0m

<keras.src.callbacks.history.History at 0x7e27a1994810>

In [None]:
model2.save('EfficientNetB0_LSTM_Modified.keras')

## Lets try unfreezing more layers, more dropout, and data augmentation

## Augmentation

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np

frame_augmenter = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    brightness_range=[0.8, 1.2],
    fill_mode='nearest'
)

def augment_frame(frame):
    if frame.dtype != np.float32:
        frame = frame.astype(np.float32) / 255.0

    frame = np.expand_dims(frame, axis=0)
    augmented = frame_augmenter.flow(frame, batch_size=1, shuffle=False)[0][0]
    return augmented


In [None]:
from tensorflow.keras.applications.efficientnet import preprocess_input

def extract_frames(video_path, num_frames=30, target_size=(224, 224), augment=False):
    cap = cv2.VideoCapture(video_path)
    frames = []
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_interval = max(1, total_frames // num_frames)

    for i in range(num_frames):
        cap.set(cv2.CAP_PROP_POS_FRAMES, i * frame_interval)
        ret, frame = cap.read()
        if not ret:
            break

        frame = cv2.convertScaleAbs(frame, alpha=1.2, beta=10)

        # Resize and interpolate
        frame = cv2.resize(frame, target_size, interpolation=cv2.INTER_AREA)

        # Apply augmentation if enabled
        if augment:
            frame = augmenter(image=frame)

        # Preprocess for EfficientNet
        frame = preprocess_input(frame.astype(np.float32))

        frames.append(frame)

    cap.release()
    return np.stack(frames, axis=0)


In [16]:
from tensorflow.keras.utils import Sequence

class VideoDataGenerator(Sequence):
    def __init__(self, df, batch_size=4, num_frames=30, target_size=(224, 224), shuffle=True, augment=False):
        self.df = df.reset_index(drop=True)
        self.batch_size = batch_size
        self.num_frames = num_frames
        self.target_size = target_size
        self.shuffle = shuffle
        self.augment = augment
        self.indices = np.arange(len(self.df))
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.df) / self.batch_size))

    def __getitem__(self, idx):
        batch_indices = self.indices[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_df = self.df.iloc[batch_indices]

        X, y = [], []
        for _, row in batch_df.iterrows():
            frames = extract_frames(row['Video Name'], self.num_frames, self.target_size, augment=self.augment)
            if frames.shape[0] == self.num_frames:
                X.append(frames)
                y.append(1 if row['Tag'].lower() == 'fake' else 0)

        return np.array(X, dtype=np.float32), np.array(y, dtype=np.float32)

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)


In [17]:
#Prepare datasets
train_gen = VideoDataGenerator(train_df, batch_size=4)
test_gen = VideoDataGenerator(test_df, batch_size=4, shuffle=False)

In [18]:
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import Input, TimeDistributed, GlobalAveragePooling2D, LSTM, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Input shape: 30 frames of size 224x224x3
input_layer = Input(shape=(30, 224, 224, 3))

# EfficientNetB0 base model for feature extraction
base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False

# Unfreeze last 20 layers
for layer in base_model.layers[-20:]:
    layer.trainable = True

# Apply EfficientNetB0 to each frame
x = TimeDistributed(base_model)(input_layer)
x = TimeDistributed(GlobalAveragePooling2D())(x)

# LSTM layers with Dropout
x = LSTM(128, return_sequences=True)(x)
x = Dropout(0.3)(x)
x = LSTM(64, return_sequences=False)(x)
x = Dropout(0.3)(x)

# Fully connected layers with Dropout
x = Dense(64, activation='relu')(x)
x = Dropout(0.3)(x)
out = Dense(1, activation='sigmoid')(x)

# Build and compile the model
model = Model(inputs=input_layer, outputs=out)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Callbacks
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=1e-6)


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [19]:
model.fit(train_gen,
          validation_data=test_gen,
          epochs=50,
          callbacks=[early_stop, lr_scheduler])

  self._warn_if_super_not_called()


Epoch 1/50
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m455s[0m 2s/step - accuracy: 0.5855 - loss: 0.6905 - val_accuracy: 0.6271 - val_loss: 0.6406 - learning_rate: 0.0010
Epoch 2/50
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 598ms/step - accuracy: 0.7197 - loss: 0.5574 - val_accuracy: 0.7288 - val_loss: 0.5150 - learning_rate: 0.0010
Epoch 3/50
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 595ms/step - accuracy: 0.7050 - loss: 0.5701 - val_accuracy: 0.8136 - val_loss: 0.4283 - learning_rate: 0.0010
Epoch 4/50
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 594ms/step - accuracy: 0.8313 - loss: 0.4277 - val_accuracy: 0.8136 - val_loss: 0.4365 - learning_rate: 0.0010
Epoch 5/50
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 594ms/step - accuracy: 0.7500 - loss: 0.4652 - val_accuracy: 0.6441 - val_loss: 0.9319 - learning_rate: 0.0010
Epoch 6/50
[1m85/85[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

<keras.src.callbacks.history.History at 0x78fcdc4a5690>

In [20]:
model.save('EfficientNetB0_LSTM_Final.keras')