In [1]:
import os
import re
import csv
import easyocr
import cv2

In [None]:
import os
import re
import csv
import easyocr
import cv2


def preprocess_image(image_path):

    image = cv2.imread(image_path)


    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    denoised = cv2.GaussianBlur(gray, (5, 5), 0)

    _, thresholded = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    preprocessed_path = 'preprocessed_image.jpg'
    cv2.imwrite(preprocessed_path, thresholded)

    return preprocessed_path


def parse_recipe(text):

    title = None
    ingredients = []
    instructions = []

    lines = text.split('\n')

    in_ingredients = False
    in_instructions = False

    for line in lines:
        line = line.strip()  

        if not line:
            continue

        if re.search(r'\b(ingredients|what is in it|)\b', line, re.IGNORECASE):
            in_ingredients = True
            in_instructions = False
            continue

        # Detect "Instructions" section
        if re.search(r'\b(instructions|yapılışı|directions|method)\b', line, re.IGNORECASE):
            in_instructions = True
            in_ingredients = False
            continue

        # Populate title (first non-empty line before Ingredients)
        if title is None and not in_ingredients and not in_instructions:
            title = line

        # Populate ingredients
        if in_ingredients:
            ingredients.append(line)

        # Populate instructions
        if in_instructions:
            instructions.append(line)

    # Return structured data
    return {
        'title': title.strip() if title else None,
        'ingredients': [item.strip() for item in ingredients],
        'instructions': ' '.join([item.strip() for item in instructions])
    }


def extract_text_with_easyocr(image_path):
    """
    Extract text from an image using EasyOCR.
    Args:
        image_path (str): Path to the image file.
    Returns:
        str: Extracted text from the image.
    """
    # Initialize EasyOCR reader
    reader = easyocr.Reader(['en', 'tr'], gpu=True)

    # Perform OCR
    result = reader.readtext(image_path, detail=0)

    # Combine all detected text
    return '\n'.join(result)


def save_to_csv(parsed_recipes, output_csv_path):
    """
    Save parsed recipes to a CSV file.
    Args:
        parsed_recipes (list of dict): List of parsed recipe dictionaries.
        output_csv_path (str): Path to the output CSV file.
    """
    # Write the parsed recipes to a CSV file
    with open(output_csv_path, 'w', encoding='utf-8', newline='') as csv_file:
        writer = csv.DictWriter(csv_file, fieldnames=['title', 'ingredients', 'instructions'])
        writer.writeheader()

        for recipe in parsed_recipes:
            writer.writerow({
                'title': recipe['title'],
                'ingredients': '; '.join(recipe['ingredients']),  # Join ingredients with a semicolon
                'instructions': recipe['instructions']
            })


def process_recipes_from_folder(image_folder, output_csv_path):
    """
    Process multiple recipe images in a folder and save results to a CSV.
    Args:
        image_folder (str): Path to the folder containing recipe images.
        output_csv_path (str): Path to the output CSV file.
    """
    parsed_recipes = []

    for filename in os.listdir(image_folder):
        if filename.lower().endswith(('.png', '.jpeg', '.jpg')):
            image_path = os.path.join(image_folder, filename)

            print(f"Processing {filename}...")

            # Preprocess the image
            preprocessed_image = preprocess_image(image_path)

            # Extract text using EasyOCR
            ocr_text = extract_text_with_easyocr(preprocessed_image)

            # Parse the text into structured recipe data
            parsed_recipe = parse_recipe(ocr_text)

            # Add to the list of recipes
            parsed_recipes.append(parsed_recipe)

    # Save all parsed recipes to a CSV file
    save_to_csv(parsed_recipes, output_csv_path)
    print(f"All recipes processed and saved to {output_csv_path}.")


# Set folder path for images and output CSV file
image_folder = 'C:/Users/oyku_/Desktop/image-pro/im/recipe_pic_keras_test'
output_csv_path = 'C:/Users/oyku_/Desktop/parsed_recipes.csv'

# Process recipes and save to CSV
process_recipes_from_folder(image_folder, output_csv_path)
