In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import torch
import torch.nn as nn
from torchvision import transforms
from PIL import Image
import cv2
from tqdm import tqdm
import timm

# -----------------------------
# Device configuration
# -----------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"üîß Using device: {device}")

# -----------------------------
# Define Hybrid Xception + ViT model
# -----------------------------
class HybridXceptionViT(nn.Module):
    def __init__(self, num_classes=2, freeze_backbones=False):
        super(HybridXceptionViT, self).__init__()

        # Xception backbone
        self.xception = timm.create_model('xception', pretrained=True, num_classes=0)
        self.xception_fc_dim = self.xception.num_features  # usually 2048

        # ViT backbone
        self.vit = timm.create_model('vit_large_patch16_224.orig_in21k', pretrained=True, num_classes=0)
        self.vit_fc_dim = self.vit.num_features  # usually 1024 or 1280

        # Optionally freeze backbones
        if freeze_backbones:
            for p in self.xception.parameters():
                p.requires_grad = False
            for p in self.vit.parameters():
                p.requires_grad = False

        # Fully connected classification head
        self.fc = nn.Sequential(
            nn.Linear(self.xception_fc_dim + self.vit_fc_dim, 512),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        # Resize for each backbone
        x_xception = nn.functional.interpolate(x, size=(299, 299), mode='bilinear', align_corners=False)
        x_vit = nn.functional.interpolate(x, size=(224, 224), mode='bilinear', align_corners=False)

        # Extract features
        x_feat = self.xception(x_xception)
        vit_feat = self.vit(x_vit)

        # Concatenate features
        combined = torch.cat([x_feat, vit_feat], dim=1)

        # Classification
        out = self.fc(combined)
        return out

# -----------------------------
# Load trained model
# -----------------------------
model = HybridXceptionViT(num_classes=2, freeze_backbones=False).to(device)
model_path = '/content/drive/MyDrive/ModelTrain/DFD/checkpoints/best_model.pth'

# Load model weights
model.load_state_dict(torch.load(model_path, map_location=device), strict=False)
model.eval()

print("‚úÖ HybridXceptionViT model loaded successfully!")

# -----------------------------
# Define transforms (same as training)
# -----------------------------
transform = transforms.Compose([
    transforms.Resize((224, 224)),   # resized again inside the model
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# -----------------------------
# Video Classification Function
# -----------------------------
def predict_video(video_path, model, transform, device, frame_skip=1):
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_count = 0
    manipulated_count = 0  # class 0
    real_count = 0         # class 1

    with tqdm(total=total_frames, desc="Processing Video", unit="frame") as pbar:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            frame_count += 1
            pbar.update(1)

            # Skip frames if needed
            if frame_count % frame_skip != 0:
                continue

            # Convert frame for model
            image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
            image = transform(image).unsqueeze(0).to(device)

            with torch.no_grad():
                outputs = model(image)
                _, predicted = torch.max(outputs, 1)

            if predicted.item() == 0:
                manipulated_count += 1
            else:
                real_count += 1

    cap.release()

    # -----------------------------
    # Final result (majority voting)
    # -----------------------------
    print(f"\nTotal Frames Processed: {frame_count}")
    print(f"üü• Manipulated Frames (class 0): {manipulated_count}")
    print(f"üü© Real Frames (class 1): {real_count}")

    if manipulated_count > real_count:
        print("‚ö†Ô∏è Final Result: Manipulated (Class 0)")
        return 0
    else:
        print("‚úÖ Final Result: Real (Class 1)")
        return 1

üîß Using device: cpu
‚úÖ HybridXceptionViT model loaded successfully!


In [None]:
def predict_image(image_path, model, transform, device):
    # Open image
    image = Image.open(image_path).convert("RGB")

    # Apply transformations
    image = transform(image).unsqueeze(0).to(device)  # [1,3,224,224]

    # Predict
    with torch.no_grad():
        outputs = model(image)
        _, predicted = torch.max(outputs, 1)

    # Map class index to label
    class_names = {0: "Manipulated", 1: "Real"}
    label = class_names[predicted.item()]

    print(f"Prediction: {label}")
    return label

In [None]:
import os

# Function to process all videos and images in a folder
def predict_files_in_folder(folder_path, model, transform, device, frame_skip=1):
    results = {}

    for file_name in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file_name)
        if os.path.isfile(file_path): # Check if it's a file

            if file_name.lower().endswith((".mp4", ".avi", ".mov", ".mkv")):  # valid video formats
                print(f"\nüîé Testing video: {file_name}")
                result = predict_video(file_path, model, transform, device, frame_skip=frame_skip)
                results[file_name] = result
            elif file_name.lower().endswith((".jpg", ".jpeg", ".png", ".bmp")): # valid image formats
                print(f"\nüîé Testing image: {file_name}")
                result = predict_image(file_path, model, transform, device)
                results[file_name] = result


    return results

In [None]:
folder_path = "/content/drive/MyDrive/ModelTrain/DFD/Test/real"
all_results = predict_videos_in_folder(folder_path, model, transform, device, frame_skip=5)


üîé Testing video: 01__talking_against_wall.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 860/860 [09:18<00:00,  1.54frame/s]



Total Frames: 860
Class 0 Count: 7
Class 1 Count: 165
‚úÖ Result: Class 1 (e.g., No_Harassment or Real)

üîé Testing video: 01__outside_talking_still_laughing.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 851/851 [09:13<00:00,  1.54frame/s]



Total Frames: 851
Class 0 Count: 5
Class 1 Count: 165
‚úÖ Result: Class 1 (e.g., No_Harassment or Real)

üîé Testing video: 01__secret_conversation.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 965/965 [10:29<00:00,  1.53frame/s]



Total Frames: 965
Class 0 Count: 2
Class 1 Count: 191
‚úÖ Result: Class 1 (e.g., No_Harassment or Real)

üîé Testing video: 01__exit_phone_room.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 305/305 [03:18<00:00,  1.54frame/s]



Total Frames: 305
Class 0 Count: 4
Class 1 Count: 57
‚úÖ Result: Class 1 (e.g., No_Harassment or Real)

üîé Testing video: 01__kitchen_pan.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 560/560 [06:07<00:00,  1.52frame/s]



Total Frames: 560
Class 0 Count: 13
Class 1 Count: 99
‚úÖ Result: Class 1 (e.g., No_Harassment or Real)

üîé Testing video: 01__meeting_serious.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1045/1045 [11:19<00:00,  1.54frame/s]



Total Frames: 1045
Class 0 Count: 162
Class 1 Count: 47
‚úÖ Result: Class 0 (e.g., Harassment or Manipulated)

üîé Testing video: 01__kitchen_still.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 800/800 [08:43<00:00,  1.53frame/s]



Total Frames: 800
Class 0 Count: 15
Class 1 Count: 145
‚úÖ Result: Class 1 (e.g., No_Harassment or Real)

üîé Testing video: 01__outside_talking_pan_laughing.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 626/626 [06:54<00:00,  1.51frame/s]



Total Frames: 626
Class 0 Count: 21
Class 1 Count: 104
‚úÖ Result: Class 1 (e.g., No_Harassment or Real)

üîé Testing video: 01__hugging_happy.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 787/787 [08:35<00:00,  1.53frame/s]



Total Frames: 787
Class 0 Count: 157
Class 1 Count: 0
‚úÖ Result: Class 0 (e.g., Harassment or Manipulated)

üîé Testing video: 01__podium_speech_happy.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 902/902 [09:49<00:00,  1.53frame/s]


Total Frames: 902
Class 0 Count: 0
Class 1 Count: 180
‚úÖ Result: Class 1 (e.g., No_Harassment or Real)





In [None]:
folder_path = "/content/drive/MyDrive/ModelTrain/DFD/Test/manipulated Image"
all_results = predict_files_in_folder(folder_path, model, transform, device, frame_skip=1)


üîé Testing image: real_01_02__outside_talking_still_laughing__YVGY8LOK.mp4_frame0.jpg
Prediction: Manipulated

üîé Testing image: real_01_02__exit_phone_room__YVGY8LOK.mp4_frame0.jpg
Prediction: Manipulated

üîé Testing image: real_01_02__hugging_happy__YVGY8LOK.mp4_frame6.jpg
Prediction: Manipulated

üîé Testing image: real_01_02__meeting_serious__YVGY8LOK.mp4_frame0.jpg
Prediction: Manipulated

üîé Testing image: real_01_02__secret_conversation__YVGY8LOK.mp4_frame0.jpg
Prediction: Real

üîé Testing image: real_01_02__talking_against_wall__YVGY8LOK.mp4_frame13.jpg
Prediction: Manipulated

üîé Testing image: real_01_02__walk_down_hall_angry__YVGY8LOK.mp4_frame3.jpg
Prediction: Manipulated


In [None]:
folder_path = "/content/drive/MyDrive/ModelTrain/DFD/Test/real image"
all_results = predict_files_in_folder(folder_path, model, transform, device, frame_skip=1)


üîé Testing image: real__podium_speech_happy.mp4_frame10.jpg
Prediction: Real

üîé Testing image: real_01__hugging_happy.mp4_frame0.jpg
Prediction: Manipulated

üîé Testing image: real_01__exit_phone_room.mp4_frame0.jpg
Prediction: Real

üîé Testing image: real_01__kitchen_pan.mp4_frame0.jpg
Prediction: Real

üîé Testing image: real_01__kitchen_still.mp4_frame1.jpg
Prediction: Real

üîé Testing image: real__meeting_serious.mp4_frame11.jpg
Prediction: Manipulated

üîé Testing image: real__outside_talking_pan_laughing.mp4_frame7.jpg
Prediction: Manipulated

üîé Testing image: real__outside_talking_pan_laughing.mp4_frame9.jpg
Prediction: Real

üîé Testing image: real__meeting_serious.mp4_frame15.jpg
Prediction: Real

üîé Testing image: real_01__meeting_serious.mp4_frame7.jpg
Prediction: Real


In [None]:
folder_path = "/content/drive/MyDrive/ModelTrain/DFD/Test/manipulated"
all_results = predict_files_in_folder(folder_path, model, transform, device, frame_skip=15)


üîé Testing video: 01_02__hugging_happy__YVGY8LOK.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 578/578 [02:06<00:00,  4.57frame/s]



Total Frames Processed: 578
üü• Manipulated Frames (class 0): 37
üü© Real Frames (class 1): 1
‚ö†Ô∏è Final Result: Manipulated (Class 0)

üîé Testing video: 01_02__exit_phone_room__YVGY8LOK.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 210/210 [00:47<00:00,  4.42frame/s]



Total Frames Processed: 210
üü• Manipulated Frames (class 0): 12
üü© Real Frames (class 1): 2
‚ö†Ô∏è Final Result: Manipulated (Class 0)

üîé Testing video: 01_02__outside_talking_still_laughing__YVGY8LOK.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 727/727 [02:37<00:00,  4.61frame/s]



Total Frames Processed: 727
üü• Manipulated Frames (class 0): 44
üü© Real Frames (class 1): 4
‚ö†Ô∏è Final Result: Manipulated (Class 0)

üîé Testing video: 01_02__talking_against_wall__YVGY8LOK.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 841/841 [03:09<00:00,  4.44frame/s]



Total Frames Processed: 841
üü• Manipulated Frames (class 0): 56
üü© Real Frames (class 1): 0
‚ö†Ô∏è Final Result: Manipulated (Class 0)

üîé Testing video: 01_02__meeting_serious__YVGY8LOK.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1044/1044 [03:49<00:00,  4.55frame/s]



Total Frames Processed: 1044
üü• Manipulated Frames (class 0): 46
üü© Real Frames (class 1): 23
‚ö†Ô∏è Final Result: Manipulated (Class 0)

üîé Testing video: 01_02__secret_conversation__YVGY8LOK.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 905/905 [03:16<00:00,  4.60frame/s]



Total Frames Processed: 905
üü• Manipulated Frames (class 0): 31
üü© Real Frames (class 1): 29
‚ö†Ô∏è Final Result: Manipulated (Class 0)

üîé Testing video: 01_02__talking_angry_couch__YVGY8LOK.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1455/1455 [05:22<00:00,  4.52frame/s]



Total Frames Processed: 1455
üü• Manipulated Frames (class 0): 89
üü© Real Frames (class 1): 8
‚ö†Ô∏è Final Result: Manipulated (Class 0)

üîé Testing video: 01_02__walk_down_hall_angry__YVGY8LOK.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 217/217 [00:45<00:00,  4.74frame/s]



Total Frames Processed: 217
üü• Manipulated Frames (class 0): 10
üü© Real Frames (class 1): 4
‚ö†Ô∏è Final Result: Manipulated (Class 0)

üîé Testing video: 01_02__walking_and_outside_surprised__YVGY8LOK.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 808/808 [03:00<00:00,  4.47frame/s]



Total Frames Processed: 808
üü• Manipulated Frames (class 0): 51
üü© Real Frames (class 1): 2
‚ö†Ô∏è Final Result: Manipulated (Class 0)

üîé Testing video: 01_02__walking_down_indoor_hall_disgust__YVGY8LOK.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 773/773 [02:53<00:00,  4.45frame/s]


Total Frames Processed: 773
üü• Manipulated Frames (class 0): 36
üü© Real Frames (class 1): 15
‚ö†Ô∏è Final Result: Manipulated (Class 0)





In [None]:
folder_path = "/content/drive/MyDrive/ModelTrain/DFD/Test/real"
all_results = predict_files_in_folder(folder_path, model, transform, device, frame_skip=15)


üîé Testing video: 01__exit_phone_room.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 305/305 [01:06<00:00,  4.56frame/s]



Total Frames Processed: 305
üü• Manipulated Frames (class 0): 9
üü© Real Frames (class 1): 11
‚úÖ Final Result: Real (Class 1)

üîé Testing video: 01__kitchen_pan.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 560/560 [02:09<00:00,  4.33frame/s]



Total Frames Processed: 560
üü• Manipulated Frames (class 0): 9
üü© Real Frames (class 1): 28
‚úÖ Final Result: Real (Class 1)

üîé Testing video: 01__meeting_serious.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1045/1045 [03:47<00:00,  4.58frame/s]



Total Frames Processed: 1045
üü• Manipulated Frames (class 0): 32
üü© Real Frames (class 1): 37
‚úÖ Final Result: Real (Class 1)

üîé Testing video: 01__kitchen_still.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 800/800 [02:54<00:00,  4.57frame/s]



Total Frames Processed: 800
üü• Manipulated Frames (class 0): 5
üü© Real Frames (class 1): 48
‚úÖ Final Result: Real (Class 1)

üîé Testing video: 01__outside_talking_pan_laughing.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 626/626 [02:17<00:00,  4.57frame/s]



Total Frames Processed: 626
üü• Manipulated Frames (class 0): 28
üü© Real Frames (class 1): 13
‚ö†Ô∏è Final Result: Manipulated (Class 0)

üîé Testing video: 01__hugging_happy.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 787/787 [02:58<00:00,  4.41frame/s]



Total Frames Processed: 787
üü• Manipulated Frames (class 0): 40
üü© Real Frames (class 1): 12
‚ö†Ô∏è Final Result: Manipulated (Class 0)

üîé Testing video: 01__secret_conversation.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 965/965 [03:33<00:00,  4.51frame/s]



Total Frames Processed: 965
üü• Manipulated Frames (class 0): 19
üü© Real Frames (class 1): 45
‚úÖ Final Result: Real (Class 1)

üîé Testing video: 01__outside_talking_still_laughing.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 851/851 [03:05<00:00,  4.59frame/s]



Total Frames Processed: 851
üü• Manipulated Frames (class 0): 40
üü© Real Frames (class 1): 16
‚ö†Ô∏è Final Result: Manipulated (Class 0)

üîé Testing video: 01__podium_speech_happy.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 902/902 [03:20<00:00,  4.50frame/s]



Total Frames Processed: 902
üü• Manipulated Frames (class 0): 22
üü© Real Frames (class 1): 38
‚úÖ Final Result: Real (Class 1)

üîé Testing video: 01__talking_against_wall.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 860/860 [03:10<00:00,  4.51frame/s]


Total Frames Processed: 860
üü• Manipulated Frames (class 0): 0
üü© Real Frames (class 1): 57
‚úÖ Final Result: Real (Class 1)



