# Closed Captioning
Implements a feature using Langchain's image_captions.py and audio_speech_to_text.py to produce .srt files. This system will provide both subtitles and visual scene descriptions, essentially creating closed captioning.

## Imports

In [3]:
# imports for closed captioning
import cv2
import numpy as np
import os
import transformers
transformers.logging.set_verbosity_error()

from langchain.document_loaders import AssemblyAIAudioTranscriptLoader
from langchain.document_loaders import ImageCaptionLoader

## Audio Speech to Text

In [8]:
audio_file = "test_data/test.mp3"

loader = AssemblyAIAudioTranscriptLoader(file_path=audio_file, api_key="f50c08e20ecd4544b175953636f0b936")

docs = loader.load()

print(docs[0].page_content)

That I aspire to be.


## Video Split to Frames

In [15]:
def frame_difference(prev_frame, curr_frame, threshold=30):
    # Compute the absolute difference between the current frame and the previous frame
    diff = cv2.absdiff(prev_frame, curr_frame)
    # Thresholding to get the binary image, where white represents significant difference
    _, thresh = cv2.threshold(diff, threshold, 255, cv2.THRESH_BINARY)
    # If there are any white pixels in thresh, the difference is significant
    return np.any(thresh)

# Initialize the video capture
capture = cv2.VideoCapture('test_data/video_test.mp4')
frameNr = 0
ret, prev_frame = capture.read()
prev_frame_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY) if ret else None

while ret:
    ret, frame = capture.read()
    if not ret:
        break
    
    # Convert to grayscale for comparison
    frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    # Compare with the previous frame
    if frameNr == 0 or frame_difference(prev_frame_gray, frame_gray):
        cv2.imwrite(f'test_data/output_frames/frame_{frameNr}.jpg', frame)
        prev_frame_gray = frame_gray
        frameNr += 1

# Release the video capture object
capture.release()

## Image Captions

In [4]:
# Define the path to the "output_frames" folder
folder_path = "test_data/output_frames/"

# List all .jpg files in the folder
image_files = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith(".jpg")]

# Create an instance of the ImageCaptionLoader
loader = ImageCaptionLoader(images=image_files)

# Load captions for the images
list_docs = loader.load()
list_docs

Downloading pytorch_model.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]



[Document(page_content='an image of a panda bear eating bamboo [SEP]', metadata={'image_path': 'test_data/output_frames/test_img.jpg'})]