<a href="https://colab.research.google.com/github/SaNadira/videodetection/blob/main/videodetection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install facenet-pytorch opencv-python torchvision



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import cv2
import torch
import numpy as np
from PIL import Image
import torch.nn.functional as F
from facenet_pytorch import MTCNN, InceptionResnetV1
from torchvision import transforms
import os

In [None]:
from google.colab.patches import cv2_imshow

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cpu


In [None]:
mtcnn = MTCNN(image_size=160, margin=20, keep_all=True, device=device)
resnet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

In [None]:
def normalize_color(frame):
    lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
    l_channel, a_channel, b_channel = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    cl = clahe.apply(l_channel)
    lab = cv2.merge((cl, a_channel, b_channel))
    normalized_frame = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR)
    return normalized_frame

In [None]:
def robust_preprocess(frame):
    filtered = cv2.bilateralFilter(frame, d=9, sigmaColor=75, sigmaSpace=75)
    normalized = normalize_color(filtered)
    return normalized

In [None]:
def augment_image(pil_img, num_aug=3):
    aug_transform =transforms.Compose([
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomRotation(degrees=10),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1)
    ])
    augmented_imgs = []
    for _ in range(num_aug):
        augmented_imgs.append(aug_transform(pil_img))
    return augmented_imgs

In [None]:
def preprocess_and_get_embedding(image_path, augment=False, num_aug=3):
    try:
        img = Image.open(image_path).convert('RGB')
    except Exception as e:
        print(f"Error opening {image_path}: {e}")
        return None

    embeddings = []
    face_tensor = mtcnn(img)
    if face_tensor is None:
        print(f"No face detected in {image_path}")
    else:
        if face_tensor.ndim == 4:
            face_tensor = face_tensor[0]
        emb = resnet(face_tensor.unsqueeze(0).to(device)).detach().cpu()[0]
        embeddings.append(emb)
    if augment:
        aug_imgs = augment_image(img, num_aug=num_aug)
        for aug_img in aug_imgs:
            face_tensor_aug = mtcnn(aug_img)
            if face_tensor_aug is None:
                continue
            if face_tensor_aug.ndim == 4:
                face_tensor_aug = face_tensor_aug[0]
            emb_aug = resnet(face_tensor_aug.unsqueeze(0).to(device)).detach().cpu()[0]
            embeddings.append(emb_aug)

    if not embeddings:
        return None
    return torch.stack(embeddings).mean(0)

In [None]:
known_people = {
    "Walter Bishop" : [
        '/content/drive/MyDrive/walterbishop/images.jfif',
        '/content/drive/MyDrive/walterbishop/fringe-john-noble-season-4.jpg',
        '/content/drive/MyDrive/walterbishop/d678jbz-c1c97c28-55f4-4af1-a8a2-4abe037f2e9e.jpg',
        '/content/drive/MyDrive/walterbishop/YyZMr.jpg',
        '/content/drive/MyDrive/walterbishop/John-Noble-Walter-Bishop.jpg',
        '/content/drive/MyDrive/walterbishop/HkjAj.jpg'
    ],
    "Olivia Dunham" : [
        '/content/drive/MyDrive/oliviadunham/olivia-dunham-16.jpg',
        '/content/drive/MyDrive/oliviadunham/images (4).jfif',
        '/content/drive/MyDrive/oliviadunham/images (3).jfif',
        '/content/drive/MyDrive/oliviadunham/images (2).jfif',
        '/content/drive/MyDrive/oliviadunham/images (1).jfif',
        '/content/drive/MyDrive/oliviadunham/MV5BNmFmMmE4M2EtOTFiMC00MGU2LWI0ZjYtZmE0NDMxMDQxYjdlXkEyXkFqcGc@._V1_.jpg'
    ]
    }
known_faces = {}
for name, img_paths in known_people.items():
    embeddings = []
    for path in img_paths:
        emb = preprocess_and_get_embedding(path, augment=True, num_aug=3)
        if emb is not None:
            embeddings.append(emb)
    if not embeddings:
        print(f"No valid embeddings were obtained for {name}; skipping.")
        continue
    # Average embeddings from all images (including augmentations)
    avg_embedding = torch.stack(embeddings).mean(0)
    known_faces[name] = avg_embedding
    print(f"Stored embedding for {name} using {len(embeddings)} processed images (with augmentations).")

Stored embedding for Walter Bishop using 6 processed images (with augmentations).
Stored embedding for Olivia Dunham using 6 processed images (with augmentations).


In [None]:
video_path = '/content/drive/MyDrive/bfa7166fb1e6c49b39aeed11ae09dc145657606-360p.mp4'

cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    print("Error: Could not open the input video file!")
else:
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    print("Total frames in input video:", total_frames)

frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
print(f"Original video dimensions: {frame_width}x{frame_height}, FPS: {fps}")

# Create a temporary directory to save processed frames.
temp_frames_dir = "/content/temp_frames"
os.makedirs(temp_frames_dir, exist_ok=True)

frame_idx = 0
print("Processing video frames and saving to temporary directory...")

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break


    frame = robust_preprocess(frame)

    frame_idx += 1
    if frame_idx % 50 == 0:
        print(f"Processed {frame_idx} frames...")



    pil_frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

    boxes, _ = mtcnn.detect(pil_frame)
    if boxes is None:
        print(f"Frame {frame_idx}: No faces detected.")
    else:
        boxes = boxes.astype(int)
        faces = mtcnn(pil_frame)
        for i, face in enumerate(faces):
            face_embedding = resnet(face.unsqueeze(0).to(device)).detach().cpu()[0]
            best_match = "Unknown"
            highest_similarity = -1.0
            for name, ref_embedding in known_faces.items():
                similarity = F.cosine_similarity(face_embedding.unsqueeze(0),
                                                 ref_embedding.unsqueeze(0))[0].item()
                print(f"Frame {frame_idx}: Similarity with {name} = {similarity:.2f}")
                if similarity > highest_similarity:
                    highest_similarity = similarity
                    best_match = name
            cosine_threshold = 0.50
            if highest_similarity < cosine_threshold:
                best_match = "Unknown"
            x1, y1, x2, y2 = boxes[i]
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0,255,0), 2)
            label = f"{best_match} ({highest_similarity*100:.1f}%)"
            cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX,
                        0.7, (0,255,0), 2)

    out_frame_path = os.path.join(temp_frames_dir, f"frame_{frame_idx:05d}.png")
    cv2.imwrite(out_frame_path, frame)

cap.release()
print(f"All {frame_idx} frames processed and saved to {temp_frames_dir}")

!ffmpeg -framerate {fps} -i /content/temp_frames/frame_%05d.png -c:v libx264 -pix_fmt yuv420p /content/output_video.mp4


drive_output_dir = "/content/drive/MyDrive/ColabVideos"
os.makedirs(drive_output_dir, exist_ok=True)
drive_output_path = os.path.join(drive_output_dir, "output_video.mp4")
!cp /content/output_video.mp4 "{drive_output_path}"
print(f"Video copied to Google Drive at: {drive_output_path}")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Frame 2047: Similarity with Walter Bishop = 0.07
Frame 2047: Similarity with Olivia Dunham = 0.03
Frame 2048: Similarity with Walter Bishop = 0.07
Frame 2048: Similarity with Olivia Dunham = -0.11
Frame 2048: Similarity with Walter Bishop = -0.03
Frame 2048: Similarity with Olivia Dunham = 0.03
Frame 2049: Similarity with Walter Bishop = -0.05
Frame 2049: Similarity with Olivia Dunham = 0.04
Processed 2050 frames...
Frame 2050: Similarity with Walter Bishop = 0.02
Frame 2050: Similarity with Olivia Dunham = -0.06
Frame 2050: Similarity with Walter Bishop = -0.01
Frame 2050: Similarity with Olivia Dunham = -0.00
Frame 2051: Similarity with Walter Bishop = 0.04
Frame 2051: Similarity with Olivia Dunham = -0.05
Frame 2051: Similarity with Walter Bishop = -0.14
Frame 2051: Similarity with Olivia Dunham = 0.06
Frame 2052: Similarity with Walter Bishop = -0.05
Frame 2052: Similarity with Olivia Dunham = -0.07
Frame 2052: Simila