# Preprocessing images

In [3]:
import cv2
import numpy as np
from PIL import Image, ImageFilter, ImageEnhance, ImageOps
import matplotlib.pyplot as plt
import os


In [4]:
def preprocess_image(image_path):
    # Open the image with Pillow
    img = Image.open(image_path)
    
    # Image dimensions
    width, height = img.size
    
    # Enlargement by a factor, e.g., 4
    factor = 4
    new_width = width * factor
    new_height = height * factor

    # Resize with quality interpolation
    resized_img = img.resize((new_width, new_height), Image.LANCZOS)

    # Apply a sharpening filter
    resized_img = resized_img.filter(ImageFilter.SHARPEN)

    # Increase contrast
    enhancer = ImageEnhance.Contrast(resized_img)
    resized_img = enhancer.enhance(5.0)  # 2.0 = double the contrast
    
    # Convert to grayscale
    gray_img = resized_img.convert("L")
    
    # Invert (black text on white background) if necessary
    gray_img = ImageOps.invert(gray_img)

    # Convert Pillow -> NumPy for OpenCV
    gray_img = np.array(gray_img)       
    
    # Apply the morphological "opening" operation
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
    opened_np = cv2.morphologyEx(gray_img, cv2.MORPH_OPEN, kernel)
    
    # Display the result
    # plt.figure(figsize=(30, 10))
    # plt.imshow(opened_np, cmap='gray')
    # plt.show()
    
    return opened_np

In [5]:
original_folders = ["../1_videos_to_relevant_screenshots/relevant_screenshots/cours_3",
                    "../1_videos_to_relevant_screenshots/relevant_screenshots/cours_5",
                    "../1_videos_to_relevant_screenshots/relevant_screenshots/cours_7",
                    "../1_videos_to_relevant_screenshots/relevant_screenshots/cours_9",
                    "../1_videos_to_relevant_screenshots/relevant_screenshots/cours_11"]

destination_folder = "./relevant_screenshots_processed"
os.makedirs(destination_folder, exist_ok=True)

for folder in original_folders:
    for image in os.listdir(folder):
        if image.endswith(".jpg"):
            processed_image = preprocess_image(os.path.join(folder, image))
            cv2.imwrite(os.path.join(destination_folder, f"{folder.split('/')[-1]}_{image}"), processed_image)

# OCR with Google Vision API

In [20]:
from google.cloud import vision
import os
import json



In [28]:
def process_images_with_vision(folder_path):
    client = vision.ImageAnnotatorClient()
    results = {}

    for image_name in os.listdir(folder_path):
        if not image_name.endswith(".jpg"):
            continue

        image_path = os.path.join(folder_path, image_name)
        print(f"Processing {image_name}...")

        with open(image_path, "rb") as image_file:
            content = image_file.read()

        image = vision.Image(content=content)
        response = client.text_detection(image=image)

        if response.error.message:
            print(f"Error processing {image_name}: {response.error.message}")
            continue

        if response.text_annotations:
            # Le premier élément contient tout le texte
            print(f"OCR result for {image_name}: {response.text_annotations[0].description}")
            results[image_name] = response.text_annotations[0].description
        else:
            results[image_name] = ""

    return results

In [29]:
def ocr_with_google_vision():
    # Process all images
    results = process_images_with_vision("./relevant_screenshots_processed")

    # Save the results in a JSON file
    with open("ocr_results.json", "w", encoding="utf-8") as f:
        json.dump(results, f, ensure_ascii=False, indent=2)

    print(f"Processed {len(results)} images. Results saved to ocr_results.json")

In [30]:
ocr_with_google_vision()

Processing cours_7_screenshot_77.jpg...
OCR result for cours_7_screenshot_77.jpg: L. Marec, L. Quetel, et al. Insitu optical fibre sensors for temperature and salinity monitoring, OCS 2005
Processing cours_3_screenshot_66.jpg...
OCR result for cours_3_screenshot_66.jpg: K.J. Nelson et al. et al. Journal of the electrochemical society, 165(3) 2018
Processing cours_3_screenshot_72.jpg...
OCR result for cours_3_screenshot_72.jpg: G. Yan et al. Journal of the electrochemical society, 165(2) 2018
Processing cours_9_screenshot_27.jpg...
OCR result for cours_9_screenshot_27.jpg: Willets et al., Annu. Rev. Phys. Chem., 2007, 58, 267-97.
Processing cours_11_screenshot_98.jpg...
OCR result for cours_11_screenshot_98.jpg: L Gold et al.. J. Power Sources. 343 536-544 (2017).
Processing cours_9_screenshot_35.jpg...
OCR result for cours_9_screenshot_35.jpg: J. N. Anker et al., Nature Materials, 2008. 7.
Processing cours_7_screenshot_70.jpg...
OCR result for cours_7_screenshot_70.jpg: T. Guao, H.W Ta