In [4]:
import cv2
import numpy as np
import pytesseract
import json

# Set path 
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"


def detect_shot_cuts(video_path, threshold=0.3):
    cap = cv2.VideoCapture(video_path)
    prev_hist = None
    cuts = 0
    frame_id = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        hist = cv2.calcHist([gray], [0], None, [256], [0, 256])
        hist = cv2.normalize(hist, hist).flatten()

        if prev_hist is not None:
            diff = cv2.compareHist(prev_hist, hist, cv2.HISTCMP_BHATTACHARYYA)
            if diff > threshold:
                cuts += 1
        prev_hist = hist
        frame_id += 1

    cap.release()
    return int(cuts)


def analyze_motion(video_path):
    cap = cv2.VideoCapture(video_path)
    ret, prev = cap.read()
    if not ret:
        return 0.0

    prev_gray = cv2.cvtColor(prev, cv2.COLOR_BGR2GRAY)
    total_magnitude = 0.0
    frame_count = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        flow = cv2.calcOpticalFlowFarneback(prev_gray, gray, None,
                                             0.5, 3, 15, 3, 5, 1.2, 0)
        magnitude, _ = cv2.cartToPolar(flow[..., 0], flow[..., 1])
        total_magnitude += float(np.sum(magnitude))
        frame_count += 1
        prev_gray = gray

    cap.release()
    avg_motion = total_magnitude / frame_count if frame_count else 0.0
    return float(avg_motion)

def detect_text_ratio(video_path):
    cap = cv2.VideoCapture(video_path)
    text_frames = 0
    total_frames = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        total_frames += 1
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        text = pytesseract.image_to_string(gray)
        if len(text.strip()) > 10:
            text_frames += 1

    cap.release()
    ratio = text_frames / total_frames if total_frames else 0.0
    return float(ratio)

def extract_features(video_path):
    features = {
        "shot_cuts": detect_shot_cuts(video_path),
        "avg_motion": analyze_motion(video_path),
        "text_present_ratio": detect_text_ratio(video_path)
    }
    return features

if __name__ == "__main__":
    #  Manually set your video file path here
    video_path = r"C:\Users\leela\OneDrive\OneDrive - hindustanuniv.ac.in\Desktop\white panda\videoos.mp4"

    features = extract_features(video_path)
    print(json.dumps(features, indent=2))


{
  "shot_cuts": 10,
  "avg_motion": 193685.38956299567,
  "text_present_ratio": 0.016324062877871828
}
