<a href="https://colab.research.google.com/github/NareshKuppili/VIDEODEDUPLICATION/blob/main/Upgrad_Thesis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install the necessary libraries (uncomment the lines below when running in Colab)
# !pip install imagehash
# !pip install moviepy
# !pip install tensorflow

import os
import imagehash
from PIL import Image
from moviepy.editor import VideoFileClip
import tensorflow as tf
import tensorflow_datasets as tfds

# Function to extract frames from a video and calculate their hashes
def extract_frames_and_hashes(video_path, frame_rate=1):
    video_clip = VideoFileClip(video_path)
    frames_hashes = {}
    for t in range(0, int(video_clip.duration), frame_rate):
        frame = video_clip.get_frame(t)
        frame_image = Image.fromarray(frame)
        frame_hash = imagehash.average_hash(frame_image)
        frames_hashes[t] = frame_hash
    video_clip.close()
    return frames_hashes

# Function to find duplicate frames based on their hashes
def find_duplicate_frames(frames_hashes, hash_threshold=5):
    duplicates = []
    keys = sorted(frames_hashes.keys())
    for i in range(len(keys) - 1):
        if frames_hashes[keys[i]] - frames_hashes[keys[i + 1]] < hash_threshold:
            duplicates.append((keys[i], keys[i + 1]))
    return duplicates

# Load the UCF101 dataset
ds, ds_info = tfds.load('ucf101', split='train', with_info=True)

# Sample usage with the UCF101 dataset
for example in ds.take(1):  # Take only one example to demonstrate
    video_path = example['video']  # Get the video tensor from the dataset
    frames_hashes = extract_frames_and_hashes(video_path.numpy())
    duplicates = find_duplicate_frames(frames_hashes)

    # Output the duplicate frames
    print("Duplicate frames found:")
    for dup in duplicates:
        print(f"Frame at {dup[0]}s is similar to frame at {dup[1]}s")

# Note: The above code is a template and may require adjustments based on the actual structure of the UCF101 dataset.
# The 'video_path' variable should be replaced with the actual path to the video file within the dataset.
# Additional code may be needed to properly extract video files from the dataset before processing.
