In [None]:
import cv2
from ultralytics import YOLO
import random
import pandas as pd

def process_video(video_path, output_video_path, threshold_seconds=1):
    # Open video capture
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)

    # Create a VideoWriter object to write the output video
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out_video = cv2.VideoWriter(output_video_path, fourcc, fps, (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))))

    # Create DataFrame to store frame data
    all_areas = []
    frame_number = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        frame_number += 1

        # Make predictions
        results = model.predict(frame)

        # Extract bounding box information
        boxes = results[0].boxes.xyxy.tolist()
        classes = results[0].boxes.cls.tolist()
        names = results[0].names
        confidences = results[0].boxes.conf.tolist()

        # Draw bounding boxes on the frame and calculate areas
        areas = []
        for box, cls, conf in zip(boxes, classes, confidences):
            x1, y1, x2, y2 = box
            class_name = names[int(cls)]
            color = (0, 255, 0)  # Green color for bounding box
            cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)
            cv2.putText(frame, f'{class_name} {conf:.2f}', (int(x1), int(y1 - 10)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

            # Calculate area
            area = (x2 - x1) * (y2 - y1)
            areas.append(area)

        # Store areas of bounding boxes for this frame
        all_areas.append((frame_number, areas))

        # Write the frame to output video
        out_video.write(frame)

    # Release resources
    cap.release()
    out_video.release()
    cv2.destroyAllWindows()

    # Create DataFrame with frame data
    sum_data = [(tup[0], sum(tup[1]), len(tup[1]), tup[1]) for tup in all_areas]
    Frame_Data_df = pd.DataFrame(sum_data, columns=['Frame', 'Total_Area', 'No_of_Bounding_Boxes', 'Individual_Areas'])

    # Find duplicate frames
    duplicate_series = find_duplicate_frames(Frame_Data_df)

    # Delete frames in duplicate series
    df_cleaned = delete_frames_in_duplicate_series(Frame_Data_df, duplicate_series)

    # Select frames with threshold
    req_frames = select_frames_with_threshold(duplicate_series, Frame_Data_df, fps, threshold_seconds)

    # Concatenate cleaned DataFrame and required frames DataFrame
    Frames_to_Retrieve_df = pd.concat([df_cleaned, req_frames])

    # Retrieve frames
    Frames_to_Retrieve = Frames_to_Retrieve_df["Frame"].astype(int).to_list()
    retrieve_frames(video_path, Frames_to_Retrieve, output_video_path)

def find_duplicate_frames(df):
    start_index = None
    end_index = None
    duplicate_series = []

    for index, row in df.iterrows():
        if index > 0:
            prev_row = df.iloc[index - 1]
            if row['No_of_Bounding_Boxes'] == prev_row['No_of_Bounding_Boxes']:
                areas_current = row['Individual_Areas']
                areas_prev = prev_row['Individual_Areas']
                all_diffs_less_than_100 = all(abs(area_curr - area_prev) < 100 for area_curr, area_prev in zip(areas_current, areas_prev))
                if all_diffs_less_than_100:
                    if start_index is None:
                        start_index = index - 1
                    end_index = index
            else:
                if start_index is not None:
                    duplicate_series.append((start_index, end_index))
                    start_index = None
                    end_index = None

    if start_index is not None:
        duplicate_series.append((start_index, end_index))

    if duplicate_series:
        return duplicate_series
    else:
        print("No Duplicated Frames Found")

def delete_frames_in_duplicate_series(Frame_Data_df, duplicate_series, frame_column='Frame'):
    df_cleaned = Frame_Data_df.copy()
    for start, end in duplicate_series:
        frames_to_delete = list(range(start, end + 1))
        df_cleaned = df_cleaned[~df_cleaned[frame_column].isin(frames_to_delete)]
    return df_cleaned

def select_frames_with_threshold(duplicate_series, Frame_Data_df, fps, threshold_seconds):
    frames = []
    threshold_frames = int(threshold_seconds * fps)  # Convert threshold to frames

    for start, end in duplicate_series:
        if end - start > threshold_frames:
            random_index = random.randint(start, end)
            random_row = Frame_Data_df.iloc[random_index]
            frames.append(random_row)

    if frames:
        req_frames = pd.DataFrame(frames)
        return req_frames
    else:
        return None
    
model = YOLO("/Users/jatavathpavannaik/Documents/python/Computer_Vision/YOLOV8/runs/detect/train_cpu/weights/best.pt")
process_video('/Users/jatavathpavannaik/Documents/python/Computer_Vision/test_case_2.mov', 'output_video.mp4')
