In [None]:
import cv2
import numpy as np
import os

def remove_horizontal_lines(image):
    # Convert the image to true black and white from grayscale
    threshold, image_bin = cv2.threshold(image, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
    # Invert the image to change white to black and vice versa
    image_inv = 255 - image_bin

    # Define kernels for horizontal lines
    kernel_len = np.array(image).shape[1] // 100
    horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_len, 20))

    # Remove anything that is not a horizontal line
    image_inv = cv2.erode(image_inv, horizontal_kernel, iterations=3)
    horizontal_lines = cv2.dilate(image_inv, horizontal_kernel, iterations=5)

    # Subtract horizontal lines from the original image to remove them
    image_without_horizontal_lines = cv2.subtract(255 * np.ones_like(image), horizontal_lines)

    return image_without_horizontal_lines

# Path to the recorded video file
video_path = "C:\\Users\\User\\Downloads\\TheAware.AI\\mp4_converted_video_journal.mp4"

# Create an "output" folder if it doesn't exist
output_folder = "output"
os.makedirs(output_folder, exist_ok=True)

# Open the video file
cap = cv2.VideoCapture(0)

# Initialize variables to store the previous bounding box position
prev_x, prev_y, prev_w, prev_h = 50, 50, 50, 50
frame_count = 0

while True:
    # Read a frame from the video
    ret, frame = cap.read()

    if not ret:
        print("Finished")
        break

    # Apply Sobel operator for vertical gradient
    sobel_y = cv2.Sobel(frame, cv2.CV_64F, 0, 1, ksize=1)
    sobel_x = cv2.Sobel(frame, cv2.CV_64F, 1, 0, ksize=1)
    sobel_y = np.abs(sobel_y)
    sobel_x = np.abs(sobel_x)
    sobel_y = np.uint8(sobel_y)
    sobel_x = np.uint8(sobel_x)
    sobel = sobel_x + sobel_y

    # Apply Canny edge detection to the vertical gradient
    edges = cv2.Canny(sobel, 90, 150)

    # Remove horizontal lines
    new_image = remove_horizontal_lines(edges)
    # Define a rectangular kernel (you can adjust the size)
    #kernel = np.ones((5, 5), np.uint8)

    # Perform erosion
    #erosion_result = cv2.erode(new_image, kernel, iterations=1)
    # Find contours in the binary image
    contours, _ = cv2.findContours(new_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Sort contours based on their areas in descending order
    contours = sorted(contours, key=cv2.contourArea, reverse=True)

    # Draw a single rectangle around the largest contour that follows a certain area threshold
    if contours:
        max_contour = contours[0]
        if cv2.contourArea(max_contour) > 20000:
            x, y, w, h = cv2.boundingRect(max_contour)

            # Stabilize the bounding box by using the previous position
            x = int(0.9 * prev_x + 0.1 * x)
            y = int(0.9 * prev_y + 0.1 * y)
            w = int(0.9 * prev_w + 0.1 * w)
            h = int(0.9 * prev_h + 0.1 * h)
            # Save the processed frame with a unique filename
            frame_count += 1
            filename = os.path.join(output_folder, f"frame_{frame_count}.jpg")
            # cv2.imwrite(filename, frame)
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
            # Add text "Text Region" on the bounding box
            cv2.putText(frame, "Text Region", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)



            # Update the previous position
            prev_x, prev_y, prev_w, prev_h = x, y, w, h

    # Display the processed frame
    cv2.imshow("Processed Frame", frame)

    # Break the loop if 'q' key is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the video and close all windows
cap.release()
cv2.destroyAllWindows()


In [None]:
import os
import cv2
import re

# Input and output folders
input_folder = "C:\\Users\\User\\Downloads\\TheAware.AI\\output"
output_folder = "C:\\Users\\User\\Downloads\\TheAware.AI\\output_final"

# Ensure the output folder exists
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Read all frames from the input folder and sort them based on filenames
frames = sorted([f for f in os.listdir(input_folder) if f.endswith('.jpg')],
                key=lambda x: int(re.search(r'\d+', x).group()))

# Select the 5th, 15th, 25th, and 30th frames for every 30 frames
selected_frames_indices = [4, 14, 24, 29]  # 0-based indices

# Save the selected frames for every 30 frames
for i in range(0, len(frames), 30):
    for selected_frame_idx in selected_frames_indices:
        current_frame_idx = i + selected_frame_idx
        if current_frame_idx < len(frames):
            selected_frame = cv2.imread(os.path.join(input_folder, frames[current_frame_idx]))
            cv2.imwrite(os.path.join(output_folder, f"selected_frame_{current_frame_idx + 1}.jpg"), selected_frame)


In [None]:
import cv2
import os
import re

def extract_keyframes_sift(directory, output_directory, frame_interval=10, ratio_threshold=0.7):
    # Create output directory if not exists
    os.makedirs(output_directory, exist_ok=True)

    # List to store selected keyframes
    selected_keyframes = []

    # Iterate through the frames in the directory
    frames = sorted([f for f in os.listdir(directory) if f.endswith('.jpg')],
                    key=lambda x: int(re.search(r'\d+', x).group()))

    for i in range(0, len(frames), frame_interval):
        filename = frames[i]
        current_frame_path = os.path.join(directory, filename)

        # Read the current frame
        current_frame = cv2.imread(current_frame_path, cv2.IMREAD_GRAYSCALE)

        # Initialize SIFT detector
        sift = cv2.SIFT_create()

        # Detect keypoints and descriptors
        kp1, des1 = sift.detectAndCompute(current_frame, None)

        # Skip if no keypoints found
        if len(kp1) == 0:
            continue

        # Flag to check if the frame has matches with other selected keyframes
        has_matches = False

        # Compare with selected keyframes
        for keyframe in selected_keyframes:
            kp2, des2 = sift.detectAndCompute(keyframe, None)

            # Use the Brute-Force Matcher with KNN
            bf = cv2.BFMatcher()
            matches = bf.knnMatch(des1, des2, k=2)

            # Apply ratio test
            good_matches = [m for m, n in matches if m.distance < ratio_threshold * n.distance]

            # If matches are found, set the flag and break
            if len(good_matches) > 0.2 * len(kp1):
                has_matches = True
                break

        # If no matches found, consider the frame as a keyframe
        if not has_matches:
            # Save the keyframe
            output_path = os.path.join(output_directory, f'KeyFrame_{filename}')
            cv2.imwrite(output_path, current_frame)

            # Add the keyframe to the selected keyframes list
            selected_keyframes.append(current_frame)

if __name__ == "__main__":
    input_directory = 'C:\\Users\\User\\Downloads\\TheAware.AI\\output_final'
    output_directory = 'C:\\Users\\User\\Downloads\\TheAware.AI\\keyframes_sift_output_final_n'

    extract_keyframes_sift(input_directory, output_directory, frame_interval=5)


In [None]:
import os
import cv2
import shutil
import numpy as np
import re
from skimage.metrics import structural_similarity as ssim

# Function to calculate structural similarity index between two images
def calculate_ssim(image1, image2):
    gray1 = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
    gray2 = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)
    return ssim(gray1, gray2)

# Function to select key frames based on structural similarity
def select_key_frames(input_directory, output_directory, threshold=0.5):
    # Create the output directory for key frames
    keyframes_directory = os.path.join(output_directory, 'keyframes_ssim_new')
    os.makedirs(keyframes_directory, exist_ok=True)

    image_files = sorted([f for f in os.listdir(input_directory) if f.endswith('.jpg')],
                     key=lambda x: int(re.search(r'\d+', os.path.splitext(x)[0]).group()))

    # Initialize variables to track key frames
    key_frames = [cv2.imread(os.path.join(input_directory, image_files[0]))]
    prev_frame = key_frames[0]

    # Iterate through the rest of the frames
    for image_file in image_files[1:]:
        current_frame = cv2.imread(os.path.join(input_directory, image_file))

        # Calculate structural similarity index
        similarity_index = calculate_ssim(prev_frame, current_frame)

        # If the similarity index is below the threshold, consider it a key frame
        if similarity_index < threshold:
            key_frames.append(current_frame)
            prev_frame = current_frame

    # Save the key frames to the new directory
    for idx, key_frame in enumerate(key_frames):
        output_path = os.path.join(keyframes_directory, f'KeyFrame_{idx + 1}.jpg')
        cv2.imwrite(output_path, key_frame)

# Specify the input and output directories
input_directory = 'C:\\Users\\User\\Downloads\\TheAware.AI\\keyframes_sift_output_final_n'
output_directory = 'C:\\Users\\User\\Downloads\\TheAware.AI\\final_frames_ssim'

# Select key frames based on structural similarity
select_key_frames(input_directory, output_directory, threshold=0.5)


In [None]:
!pip install google-cloud-vision

In [None]:
import os
from google.cloud import vision
import re
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'C:\\Users\\User\\Downloads\\TheAware.AI\\mirror-theaware-ai-07817b987170.json'

vision_client = vision.ImageAnnotatorClient()
image = vision.Image()

# Specify the local file path of the image
IMAGE_PATH = 'C:\\Users\\User\\Downloads\\TheAware.AI\\final_frames_ssim\\keyframes_ssim_new\\KeyFrame_2.jpg'

# Read the image file and set it as the content of the 'image' object
with open(IMAGE_PATH, 'rb') as image_file:
    image_content = image_file.read()
    image.content = image_content

# Perform text detection on the image
response = vision_client.text_detection(image=image)

# Extract text annotations from the response
text_annotations = response.text_annotations

# Process the detected text as needed
if text_annotations:
    # Assuming you want the description of the first text annotation
    text = text_annotations[0].description

else:
    print("No text detected in the image.")
print(text)

In [None]:
import os
from google.cloud import vision
import re
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'C:\\Users\\User\\Downloads\\TheAware.AI\\mirror-theaware-ai-07817b987170.json'

vision_client = vision.ImageAnnotatorClient()
image = vision.Image()

# Specify the local file path of the image
IMAGE_PATH = 'C:\\Users\\User\\Downloads\\TheAware.AI\\animaldoodles-1.jpg'

# Read the image file and set it as the content of the 'image' object
with open(IMAGE_PATH, 'rb') as image_file:
    image_content = image_file.read()
    image.content = image_content

    # Perform object detection
response = vision_client.object_localization(image=image)
objects = response.localized_object_annotations

    # Print the results
for obj in objects:
        print(f"Object name: {obj.name}")
        print(f"Confidence: {obj.score:.2%}")
        print(f"Bounding box vertices:")
        for vertex in obj.bounding_poly.normalized_vertices:
            print(f"  - ({vertex.x}, {vertex.y})")

# Process the detected text as needed
#if text_annotations:
    # Assuming you want the description of the first text annotation
#    text = text_annotations[0].description

#else:
#    print("No text detected in the image.")


Document AI for Text Detection

In [None]:
from typing import Optional, Sequence

from google.api_core.client_options import ClientOptions


In [None]:
!pip install --upgrade google-cloud-documentai

In [None]:
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas

def convert_text_to_pdf(text, output_pdf):
    # Create a PDF file
    with open(output_pdf, 'wb') as pdf_file:
        # Create a PDF canvas
        pdf = canvas.Canvas(pdf_file, pagesize=letter)

        # Add the extracted text to the PDF
        pdf.drawString(100, 700, text)

        # Save the PDF file
        pdf.save()

    print(f"Text converted to PDF successfully: {output_pdf}")

# Specify the output PDF file for the converted text
output_text_pdf = 'C:\\Users\\User\\Downloads\\TheAware.AI\\Digital_text.pdf'




In [None]:
def process_document_ocr_sample(
    project_id: str,
    location: str,
    processor_id: str,
    processor_version: str,
    file_path: str,
    mime_type: str,
) -> None:
    # Optional: Additional configurations for Document OCR Processor.
    # For more information: https://cloud.google.com/document-ai/docs/document-ocr
    process_options = documentai.ProcessOptions(
        ocr_config=documentai.OcrConfig(
            compute_style_info=True,
            enable_native_pdf_parsing=True,
            enable_image_quality_scores=True,
            enable_symbol=True,
        )
    )
    # Online processing request to Document AI
    document = process_document(
        project_id,
        location,
        processor_id,
        processor_version,
        file_path,
        mime_type,
        process_options=process_options,
    )

    text = document.text

    def convert_text_to_pdf(lines, output_pdf):
        # Create a PDF file
        with open(output_pdf, 'wb') as pdf_file:
            # Create a PDF canvas
            pdf = canvas.Canvas(pdf_file, pagesize=letter)

            # Add each line of text to the PDF
            y_position = 700  # Adjust the starting Y position as needed
            for line in lines:
                pdf.drawString(100, y_position, line)
                y_position -= 12  # Adjust the line spacing as needed

            # Save the PDF file
            pdf.save()
        print(f"Text converted to PDF successfully: {output_pdf}")



    # Assuming 'text' is the extracted text from the Document AI OCR process, split it into lines
    text_lines = text.split('\n')

    # Specify the output PDF file for the converted text
    output_text_pdf = 'C:\\Users\\User\\Downloads\\TheAware.AI\\Digital_text_2.pdf'
    
    # Call the function to convert text to PDF
    #convert_text_to_pdf(text_lines, output_text_pdf)
    print(f"Full document text: {text}\n")
    print(f"There are {len(document.pages)} page(s) in this document.\n")
    
    for page in document.pages:
        print(f"Page {page.page_number}:")
        print_page_dimensions(page.dimension)
        print_detected_langauges(page.detected_languages)

        print_blocks(page.blocks, text)
        print_paragraphs(page.paragraphs, text)
        print_lines(page.lines, text)
        print_tokens(page.tokens, text)

        if page.symbols:
            print_symbols(page.symbols, text)

        if page.image_quality_scores:
            print_image_quality_scores(page.image_quality_scores)

    if document.text_styles:
        print_styles(document.text_styles, text)


def print_page_dimensions(dimension: documentai.Document.Page.Dimension) -> None:
    print(f"    Width: {str(dimension.width)}")
    print(f"    Height: {str(dimension.height)}")


def print_detected_langauges(
    detected_languages: Sequence[documentai.Document.Page.DetectedLanguage],
) -> None:
    print("    Detected languages:")
    for lang in detected_languages:
        print(f"        {lang.language_code} ({lang.confidence:.1%} confidence)")


def print_blocks(blocks: Sequence[documentai.Document.Page.Block], text: str) -> None:
    print(f"    {len(blocks)} blocks detected:")
    first_block_text = layout_to_text(blocks[0].layout, text)
    print(f"        First text block: {repr(first_block_text)}")
    last_block_text = layout_to_text(blocks[-1].layout, text)
    print(f"        Last text block: {repr(last_block_text)}")


def print_paragraphs(
    paragraphs: Sequence[documentai.Document.Page.Paragraph], text: str
) -> None:
    print(f"    {len(paragraphs)} paragraphs detected:")
    first_paragraph_text = layout_to_text(paragraphs[0].layout, text)
    print(f"        First paragraph text: {repr(first_paragraph_text)}")
    last_paragraph_text = layout_to_text(paragraphs[-1].layout, text)
    print(f"        Last paragraph text: {repr(last_paragraph_text)}")


def print_lines(lines: Sequence[documentai.Document.Page.Line], text: str) -> None:
    print(f"    {len(lines)} lines detected:")
    first_line_text = layout_to_text(lines[0].layout, text)
    print(f"        First line text: {repr(first_line_text)}")
    last_line_text = layout_to_text(lines[-1].layout, text)
    print(f"        Last line text: {repr(last_line_text)}")


def print_tokens(tokens: Sequence[documentai.Document.Page.Token], text: str) -> None:
    print(f"    {len(tokens)} tokens detected:")
    first_token_text = layout_to_text(tokens[0].layout, text)
    first_token_break_type = tokens[0].detected_break.type_.name
    print(f"        First token text: {repr(first_token_text)}")
    print(f"        First token break type: {repr(first_token_break_type)}")
    last_token_text = layout_to_text(tokens[-1].layout, text)
    last_token_break_type = tokens[-1].detected_break.type_.name
    print(f"        Last token text: {repr(last_token_text)}")
    print(f"        Last token break type: {repr(last_token_break_type)}")


def print_symbols(
    symbols: Sequence[documentai.Document.Page.Symbol], text: str
) -> None:
    print(f"    {len(symbols)} symbols detected:")
    first_symbol_text = layout_to_text(symbols[0].layout, text)
    print(f"        First symbol text: {repr(first_symbol_text)}")
    last_symbol_text = layout_to_text(symbols[-1].layout, text)
    print(f"        Last symbol text: {repr(last_symbol_text)}")


def print_image_quality_scores(
    image_quality_scores: documentai.Document.Page.ImageQualityScores,
) -> None:
    print(f"    Quality score: {image_quality_scores.quality_score:.1%}")
    print("    Detected defects:")

    for detected_defect in image_quality_scores.detected_defects:
        print(f"        {detected_defect.type_}: {detected_defect.confidence:.1%}")


def print_styles(styles: Sequence[documentai.Document.Style], text: str) -> None:
    print(f"    {len(styles)} styles detected:")
    first_style_text = layout_to_text(styles[0].layout, text)
    print(f"        First style text: {repr(first_style_text)}")
    print(f"           Color: {styles[0].color}")
    print(f"           Background Color: {styles[0].background_color}")
    print(f"           Font Weight: {styles[0].font_weight}")
    print(f"           Text Style: {styles[0].text_style}")
    print(f"           Text Decoration: {styles[0].text_decoration}")
    print(f"           Font Size: {styles[0].font_size.size}{styles[0].font_size.unit}")
    print(f"           Font Family: {styles[0].font_family}")


def process_document(
    project_id: str,
    location: str,
    processor_id: str,
    processor_version: str,
    file_path: str,
    mime_type: str,
    process_options: Optional[documentai.ProcessOptions] = None,
) -> documentai.Document:
    # You must set the `api_endpoint` if you use a location other than "us".
    client = documentai.DocumentProcessorServiceClient(
        client_options=ClientOptions(
            api_endpoint=f"{location}-documentai.googleapis.com"
        )
    )

    # The full resource name of the processor version, e.g.:
    # `projects/{project_id}/locations/{location}/processors/{processor_id}/processorVersions/{processor_version_id}`
    # You must create a processor before running this sample.
    name = client.processor_version_path(
        project_id, location, processor_id, processor_version
    )

    # Read the file into memory
    with open(file_path, "rb") as image:
        image_content = image.read()

    # Configure the process request
    request = documentai.ProcessRequest(
        name=name,
        raw_document=documentai.RawDocument(content=image_content, mime_type=mime_type),
        # Only supported for Document OCR processor
        process_options=process_options,
    )

    result = client.process_document(request=request)

    # For a full list of `Document` object attributes, reference this page:
    # https://cloud.google.com/document-ai/docs/reference/rest/v1/Document
    return result.document


def layout_to_text(layout: documentai.Document.Page.Layout, text: str) -> str:
    """
    Document AI identifies text in different parts of the document by their
    offsets in the entirety of the document"s text. This function converts
    offsets to a string.
    """
    # If a text segment spans several lines, it will
    # be stored in different text segments.
    return "".join(
        text[int(segment.start_index) : int(segment.end_index)]
        for segment in layout.text_anchor.text_segments
    )

# TODO(developer): Edit these variables before running the sample.
project_id = "theaware-ai"
location = "us"  # Format is 'us' or 'eu'
processor_id = "7"  # Create processor before running sample
processor_version = "pretrained-ocr-v2.0-2023-06-02"
file_path = "C:\\Users\\User\\Downloads\\TheAware.AI\\final_frames_ssim\\keyframes_ssim_new\\KeyFrame_10.jpg"
mime_type = "image/jpeg"  # Refer to https://cloud.google.com/document-ai/docs/file-types for supported file types


process_document_ocr_sample(
    project_id=project_id,
    location=location,
    processor_id=processor_id,
    processor_version=processor_version,
    file_path=file_path,
    mime_type=mime_type,
)