In [3]:
import os
import cv2
import torch
import numpy as np
from facenet_pytorch import InceptionResnetV1
from PIL import Image
from sklearn.metrics.pairwise import cosine_similarity

# Check if GPU is available and set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load pre-trained FaceNet model for feature extraction
facenet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

# Initialize Haar Cascade for face detection
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

def preprocess_image(img):
    img = img.convert("RGB")  # Ensure image is in RGB format
    img_resized = img.resize((160, 160))
    img_tensor = torch.tensor(np.array(img_resized)).permute(2, 0, 1).float().div(255).unsqueeze(0).to(device)
    with torch.no_grad():
        img_embedding = facenet(img_tensor)
    return img_embedding.cpu().numpy().flatten()

# Load and preprocess the single input image
input_image_path = "image.png"  # Change this to the path of your input image
input_img = Image.open(input_image_path)
input_face_encoding = preprocess_image(input_img)

if input_face_encoding is None:
    print("No face detected in the input image.")
    exit()

def match_face(face_encoding, input_face_encoding, threshold=0.6):
    similarity = cosine_similarity([face_encoding], [input_face_encoding])[0][0]
    return similarity, similarity > threshold

input_video_path = "A Special Message For Goli - Taarak Mehta Ka Ooltah Chashmah - Full Episode - Ep 3899 - 11 Oct 2023.mp4"  # Change this to the path of your input video
output_video_path = "output.mp4"

cap = cv2.VideoCapture(input_video_path)
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

frame_count = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_count += 1

    # Detect faces using Haar Cascade
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, 1.3, 5)

    if len(faces) > 0:
        matches_found = False
        print(f"Faces found in frame {frame_count}: {len(faces)}")
        
        for (x, y, w, h) in faces:
            face_image = frame[y:y+h, x:x+w]
            face_pil = Image.fromarray(cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)).resize((160, 160))
            face_encoding = preprocess_image(face_pil)

            if face_encoding is not None:
                similarity, is_match = match_face(face_encoding, input_face_encoding)
                print(f"Similarity: {similarity}")

                if is_match:
                    matches_found = True
                    print(f"Match found in frame {frame_count} (Similarity: {similarity:.2f})")
                    out.write(frame)  # Write the frame to the output video

        if not matches_found:
            print(f"No matches found in frame {frame_count}")
    else:
        print(f"No faces found in frame {frame_count}")

cap.release()
out.release()
print(f"Trimmed video saved as {output_video_path}")


No faces found in frame 1
No faces found in frame 2
No faces found in frame 3
No faces found in frame 4
No faces found in frame 5
No faces found in frame 6
No faces found in frame 7
No faces found in frame 8
No faces found in frame 9
No faces found in frame 10
Faces found in frame 11: 1
Similarity: 0.10142586380243301
No matches found in frame 11
Faces found in frame 12: 2
Similarity: 0.09125927090644836
Similarity: 0.15126590430736542
No matches found in frame 12
No faces found in frame 13
No faces found in frame 14
No faces found in frame 15
No faces found in frame 16
No faces found in frame 17
No faces found in frame 18
No faces found in frame 19
No faces found in frame 20
No faces found in frame 21
No faces found in frame 22
No faces found in frame 23
No faces found in frame 24
No faces found in frame 25
Faces found in frame 26: 1
Similarity: 0.04229355603456497
No matches found in frame 26
Faces found in frame 27: 1
Similarity: -0.013689108192920685
No matches found in frame 27
Fa

In [5]:
import os
import cv2
import torch
import numpy as np
from facenet_pytorch import InceptionResnetV1
from PIL import Image
import face_recognition
from sklearn.metrics.pairwise import cosine_similarity

# Check if GPU is available and set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load pre-trained FaceNet model for feature extraction
facenet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

def preprocess_image(img):
    img = img.convert("RGB")  # Ensure image is in RGB format
    img_cropped = face_recognition.face_locations(np.array(img), model="cnn")
    if img_cropped:
        top, right, bottom, left = img_cropped[0]
        face_image = img.crop((left, top, right, bottom))
        face_image = face_image.resize((160, 160))
        face_tensor = torch.tensor(np.array(face_image) / 255.0).permute(2, 0, 1).unsqueeze(0).float().to(device)
        with torch.no_grad():
            img_embedding = facenet(face_tensor)
        return img_embedding.cpu().numpy().flatten()
    return None

# Load and preprocess the single input image
input_image_path = "image.png"  # Change this to the path of your input image
input_img = Image.open(input_image_path)
input_face_encoding = preprocess_image(input_img)

if input_face_encoding is None:
    print("No face detected in the input image.")
    exit()

def match_face(face_encoding, input_face_encoding, threshold=0.6):
    similarity = cosine_similarity([face_encoding], [input_face_encoding])[0][0]
    return similarity, similarity > threshold

input_video_path = "video.mp4"  # Change this to the path of your input video
output_video_path = "output_video.mp4"

cap = cv2.VideoCapture(input_video_path)
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

frame_count = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_count += 1

    # Detect faces
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    face_locations = face_recognition.face_locations(frame_rgb, model="cnn")

    if face_locations:
        matches_found = False
        print(f"Faces found in frame {frame_count}: {len(face_locations)}")

        for (top, right, bottom, left) in face_locations:
            face_image = frame_rgb[top:bottom, left:right]
            face_pil = Image.fromarray(face_image).resize((160, 160))
            face_encoding = preprocess_image(face_pil)

            if face_encoding is not None:
                similarity, is_match = match_face(face_encoding, input_face_encoding)
                print(f"Similarity: {similarity}")

                if is_match:
                    matches_found = True
                    print(f"Match found in frame {frame_count} (Similarity: {similarity:.2f})")
                    out.write(frame)  # Write the frame to the output video

        if not matches_found:
            print(f"No matches found in frame {frame_count}")
    else:
        print(f"No faces found in frame {frame_count}")

cap.release()
out.release()
print(f"Trimmed video saved as {output_video_path}")


Faces found in frame 1: 2
Similarity: 0.1007562205195427
Similarity: 0.09313720464706421
No matches found in frame 1
Faces found in frame 2: 2
Similarity: 0.057690154761075974
Similarity: 0.10311184823513031
No matches found in frame 2
Faces found in frame 3: 2
Similarity: 0.08599691838026047
Similarity: 0.21366433799266815
No matches found in frame 3
Faces found in frame 4: 3
Similarity: 0.09528002887964249
Similarity: 0.21509265899658203
No matches found in frame 4
Faces found in frame 5: 3
Similarity: -0.058512814342975616
Similarity: 0.27407193183898926
No matches found in frame 5
Faces found in frame 6: 3
Similarity: -0.0586182102560997
Similarity: 0.23105354607105255
No matches found in frame 6
Faces found in frame 7: 3
Similarity: -0.053106509149074554
Similarity: 0.24376355111598969
No matches found in frame 7
Faces found in frame 8: 3
Similarity: 0.012370966374874115
Similarity: 0.2464390993118286
No matches found in frame 8
Faces found in frame 9: 3
Similarity: -0.04916087165

KeyboardInterrupt: 