In [1]:
# Load the images
import os
import dotenv

dotenv.load_dotenv(dotenv.find_dotenv())
path = os.getenv("IMAGES_PATH")

In [2]:
from PIL import Image
import pytesseract

def extract_text(filepath):
    text = None
    
    # check if the file is an image
    if filepath.endswith(".png") or filepath.endswith(".jpg"):
        # open the image file
        img = Image.open(filepath)
        # use pytesseract to extract text from the image
        text = pytesseract.image_to_string(img)
        # add the extracted text to the list
    
    # return the text
    return text

In [2]:
# print the list of extracted texts
print(extract_text(''))

KeyboardInterrupt: 

In [None]:
# loop over each file in the directory
for filename in os.listdir(path):
    extract_text(path + '\\' + filename)

In [None]:
import cv2
import math

def preprocess_image(image_path):
    # Load the image
    img = cv2.imread(image_path)
    
    # Convert to grayscale
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Edge detection and find horizontal lines, adjust parameters as needed
    edges = cv2.Canny(img, 50, 150, apertureSize=3)
    lines = cv2.HoughLinesP(edges, 1, math.pi/180, 100, minLineLength=1000, maxLineGap=50)

    # Ensure lines were found before proceeding
    if lines is not None:
        # Calculate split points
        split_points = [line[0][1] for line in lines]
        split_points.sort()

        # Assuming uniform height for each thumbnail (adjust as necessary)
        thumbnails = [img[split_points[i]:split_points[i+1]] for i in range(len(split_points)-1)]
        return thumbnails
    else:
        return []

# Example usage
thumbnails = preprocess_image("...")

# Save or process these thumbnails as needed
# Save or process these thumbnails as needed
for i, thumb in enumerate(thumbnails):
    if thumb.size > 0:
        cv2.imwrite(f"thumbnail_{i}.png", thumb)
    else:
        print(f"Thumbnail {i} is empty")
    # Further processing like OCR can go here

In [None]:
import cv2

# Cut and preprocess the images
def cutImage(image_path):
    
    # List of Videos to return
    videos = []

    # Load the image
    img = cv2.imread(image_path)

    # Convert to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Make the image binary and find the contours
    adaptive_thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2)
    contours, _ = cv2.findContours(adaptive_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Filter and sort the contours
    filtered_contours = [cnt for cnt in contours if cv2.contourArea(cnt) > 5000]
    filtered_contours.sort(key=lambda x: cv2.boundingRect(x)[1])

    # Cut the images and add them to the list
    for cnt in filtered_contours:
        y, w, h = cv2.boundingRect(cnt)
        if w > h:
            thumbnail = img[y:y+h]
            videos.append(thumbnail)
    
    # Return the list of videos
    return videos

In [None]:
if not os.path.exists(os.path.join(path, 'cut')):
    os.makedirs(os.path.join(path, 'cut'))

for i, video_item in enumerate(cutImage(test_path)):
    t_path = os.path.join(path, f'cut/video_item_{i}.png')
    cv2.imwrite(t_path, video_item)