In [None]:
!apt-get update
!apt-get install -y tesseract-ocr
!pip install pytesseract

In [None]:
import cv2
import pytesseract
import numpy as np
import os
from pathlib import Path

pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'

def extract_text_from_frame(frame):
    """
    Extract text from a single video frame using Tesseract OCR.
    """
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
    
    text = pytesseract.image_to_string(gray, lang='eng')
    return text.strip()

def process_video(video_path, output_dir='output', frame_interval=30):
    """
    Process a video file, extract text from frames, and save to a text file.
    Args:
        video_path (str): Path to the input video file.
        output_dir (str): Directory to save extracted text.
        frame_interval (int): Process every nth frame to reduce computation.
    """
    os.makedirs(output_dir, exist_ok=True)
    
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error: Could not open video file {video_path}")
        return
    
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    print(f"Video loaded: {frame_count} frames, {fps} FPS")

    extracted_texts = []
    frame_number = 0
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
            
        if frame_number % frame_interval == 0:
            text = extract_text_from_frame(frame)
            if text:  
                extracted_texts.append(f"Frame {frame_number}: {text}")
        
        frame_number += 1
    
    
    cap.release()
    
    output_file = os.path.join(output_dir, 'extracted_text.txt')
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write('\n'.join(extracted_texts))
    
    print(f"Text extracted and saved to {output_file}")
    return extracted_texts

def main():
    video_path = '/kaggle/input/ai-video/ai video.mp4'  # Replace with your video path
    output_dir = '/kaggle/working/output'
    
    # Process the video and extract text
    texts = process_video(video_path, output_dir)

if __name__ == "__main__":
    main()