In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import torch
import torch.nn as nn
from torchvision import transforms
from PIL import Image
import cv2
import timm
from tqdm import tqdm   # progress bar

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define image transformations (same as used during training)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load the trained model
model = timm.create_model('vit_large_patch16_224.orig_in21k', pretrained=False, num_classes=2)
model_path = '/content/drive/MyDrive/ModelTrain/DFD/checkpoints/best_model.pth'
model.load_state_dict(torch.load(model_path, map_location=device), strict=True)
model.to(device)
model.eval()

# Function to process the video and classify each frame with progress bar
def predict_video(video_path, model, transform, device, frame_skip=1):
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))  # total frames in video
    frame_count = 0
    real_count = 0
    manipulated_count = 0

    # tqdm progress bar
    with tqdm(total=total_frames, desc="Processing Video", unit="frame") as pbar:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            frame_count += 1
            pbar.update(1)  # update progress bar

            # Optional: skip frames to speed up
            if frame_count % frame_skip != 0:
                continue

            # Convert frame to PIL Image and apply transformations
            image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
            image = transform(image).unsqueeze(0).to(device)  # Add batch dimension

            # Make prediction
            with torch.no_grad():
                outputs = model(image)
                _, predicted = torch.max(outputs, 1)

            if predicted.item() == 0:
                manipulated_count += 1
            else:
                real_count += 1

    cap.release()

    # Final decision based on majority vote across all frames
    print(f"\nTotal Frames: {frame_count}")
    print(f"Detected Real: {real_count}, Detected Manipulated: {manipulated_count}")

    if real_count > manipulated_count:
        print(f"‚úÖ Result: Real video")
        return "Real"
    else:
        print(f"‚ö†Ô∏è Result: Manipulated video")
        return "Manipulated"

In [None]:
def predict_image(image_path, model, transform, device):
    # Open image
    image = Image.open(image_path).convert("RGB")

    # Apply transformations
    image = transform(image).unsqueeze(0).to(device)  # [1,3,224,224]

    # Predict
    with torch.no_grad():
        outputs = model(image)
        _, predicted = torch.max(outputs, 1)

    # Map class index to label
    class_names = {0: "Manipulated", 1: "Real"}
    label = class_names[predicted.item()]

    print(f"Prediction: {label}")
    return label

In [3]:
import os

# Function to process all videos in a folder
def predict_videos_in_folder(folder_path, model, transform, device, frame_skip=1):
    results = {}

    for file_name in os.listdir(folder_path):
        if file_name.lower().endswith((".mp4", ".avi", ".mov", ".mkv")):  # valid video formats
            video_path = os.path.join(folder_path, file_name)
            print(f"\nüîé Testing video: {file_name}")
            result = predict_video(video_path, model, transform, device, frame_skip=frame_skip)
            results[file_name] = result

    return results

In [None]:
# Test the video
video_path = '/content/drive/MyDrive/ModelTrain/DFD/Test/r.mp4'
result = predict_video(video_path, model, transform, device)

In [None]:
img_path = "/content/sample_image.jpg"
result = predict_image(img_path, model, transform, device)

In [5]:
folder_path = "/content/drive/MyDrive/ModelTrain/DFD/Test/manipulated"
all_results = predict_videos_in_folder(folder_path, model, transform, device, frame_skip=5)


üîé Testing video: 01_02__hugging_happy__YVGY8LOK.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 578/578 [04:48<00:00,  2.00frame/s]



Total Frames: 578
Detected Real: 3, Detected Manipulated: 112
‚ö†Ô∏è Result: Manipulated video

üîé Testing video: 01_02__exit_phone_room__YVGY8LOK.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 210/210 [01:43<00:00,  2.03frame/s]



Total Frames: 210
Detected Real: 7, Detected Manipulated: 35
‚ö†Ô∏è Result: Manipulated video

üîé Testing video: 01_02__outside_talking_still_laughing__YVGY8LOK.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 727/727 [06:39<00:00,  1.82frame/s]



Total Frames: 727
Detected Real: 14, Detected Manipulated: 131
‚ö†Ô∏è Result: Manipulated video

üîé Testing video: 01_02__talking_against_wall__YVGY8LOK.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 841/841 [07:46<00:00,  1.80frame/s]



Total Frames: 841
Detected Real: 2, Detected Manipulated: 166
‚ö†Ô∏è Result: Manipulated video

üîé Testing video: 01_02__meeting_serious__YVGY8LOK.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1044/1044 [09:38<00:00,  1.80frame/s]



Total Frames: 1044
Detected Real: 1, Detected Manipulated: 207
‚ö†Ô∏è Result: Manipulated video

üîé Testing video: 01_02__secret_conversation__YVGY8LOK.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 905/905 [08:17<00:00,  1.82frame/s]



Total Frames: 905
Detected Real: 9, Detected Manipulated: 172
‚ö†Ô∏è Result: Manipulated video

üîé Testing video: 01_02__talking_angry_couch__YVGY8LOK.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1455/1455 [13:24<00:00,  1.81frame/s]



Total Frames: 1455
Detected Real: 18, Detected Manipulated: 273
‚ö†Ô∏è Result: Manipulated video

üîé Testing video: 01_02__walk_down_hall_angry__YVGY8LOK.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 217/217 [01:59<00:00,  1.81frame/s]



Total Frames: 217
Detected Real: 30, Detected Manipulated: 13
‚úÖ Result: Real video

üîé Testing video: 01_02__walking_and_outside_surprised__YVGY8LOK.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 808/808 [07:30<00:00,  1.79frame/s]



Total Frames: 808
Detected Real: 7, Detected Manipulated: 154
‚ö†Ô∏è Result: Manipulated video

üîé Testing video: 01_02__walking_down_indoor_hall_disgust__YVGY8LOK.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 773/773 [07:11<00:00,  1.79frame/s]


Total Frames: 773
Detected Real: 2, Detected Manipulated: 152
‚ö†Ô∏è Result: Manipulated video





In [6]:
folder_path = "/content/drive/MyDrive/ModelTrain/DFD/Test/real"
all_results = predict_videos_in_folder(folder_path, model, transform, device, frame_skip=5)


üîé Testing video: 01__exit_phone_room.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 305/305 [02:49<00:00,  1.80frame/s]



Total Frames: 305
Detected Real: 24, Detected Manipulated: 37
‚ö†Ô∏è Result: Manipulated video

üîé Testing video: 01__kitchen_pan.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 560/560 [05:11<00:00,  1.80frame/s]



Total Frames: 560
Detected Real: 52, Detected Manipulated: 60
‚ö†Ô∏è Result: Manipulated video

üîé Testing video: 01__meeting_serious.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1045/1045 [09:39<00:00,  1.80frame/s]



Total Frames: 1045
Detected Real: 5, Detected Manipulated: 204
‚ö†Ô∏è Result: Manipulated video

üîé Testing video: 01__kitchen_still.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 800/800 [07:24<00:00,  1.80frame/s]



Total Frames: 800
Detected Real: 119, Detected Manipulated: 41
‚úÖ Result: Real video

üîé Testing video: 01__outside_talking_pan_laughing.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 626/626 [05:44<00:00,  1.82frame/s]



Total Frames: 626
Detected Real: 32, Detected Manipulated: 93
‚ö†Ô∏è Result: Manipulated video

üîé Testing video: 01__hugging_happy.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 787/787 [07:15<00:00,  1.81frame/s]



Total Frames: 787
Detected Real: 18, Detected Manipulated: 139
‚ö†Ô∏è Result: Manipulated video

üîé Testing video: 01__secret_conversation.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 965/965 [08:57<00:00,  1.80frame/s]



Total Frames: 965
Detected Real: 12, Detected Manipulated: 181
‚ö†Ô∏è Result: Manipulated video

üîé Testing video: 01__outside_talking_still_laughing.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 851/851 [07:49<00:00,  1.81frame/s]



Total Frames: 851
Detected Real: 51, Detected Manipulated: 119
‚ö†Ô∏è Result: Manipulated video

üîé Testing video: 01__podium_speech_happy.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 902/902 [08:17<00:00,  1.81frame/s]



Total Frames: 902
Detected Real: 104, Detected Manipulated: 76
‚úÖ Result: Real video

üîé Testing video: 01__talking_against_wall.mp4


Processing Video: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 860/860 [07:56<00:00,  1.80frame/s]


Total Frames: 860
Detected Real: 149, Detected Manipulated: 23
‚úÖ Result: Real video



