In [1]:
import os
import re
import csv
import easyocr
import cv2

from spellchecker import SpellChecker


In [None]:
# def preprocess_image(image_path):
#     image = cv2.imread(image_path)
#     gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
#     denoised = cv2.GaussianBlur(gray, (5, 5), 0)
#     _, thresholded = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
#     preprocessed_path = 'preprocessed_image.jpg'
#     cv2.imwrite(preprocessed_path, thresholded)
#     return preprocessed_path

def extract_text_with_easyocr(image_path):
    reader = easyocr.Reader(['en'], gpu=True)
    result = reader.readtext(image_path, detail=0)
    return '\n'.join(result)

def parse_recipe(text):
    title, ingredients, instructions = [], [], []
    lines = text.split('\n')
    in_ingredients, in_instructions = False, False
    for line in lines:
        line = line.strip()
        if not line:
            continue
        if re.search(r'\b(ingredients|what is in it)\b', line, re.IGNORECASE):
            in_ingredients, in_instructions = True, False
            continue
        if re.search(r'\b(instructions|documents|directions|method)\b', line, re.IGNORECASE):
            in_instructions, in_ingredients = True, False
            continue
        if not in_ingredients and not in_instructions and len(title) < 3:
            title.append(line)
        if in_ingredients:
            ingredients.append(line)
        if in_instructions:
            instructions.append(line)
    return {
        'title': ' '.join(title).strip() if title else "N/A",
        'ingredients': [item.strip() for item in ingredients] if ingredients else ["N/A"],
        'instructions': ' '.join([item.strip() for item in instructions]) if instructions else "N/A"
    }

def calculate_spelling_accuracy(text):
    spell = SpellChecker()
    words = text.split()
    misspelled = spell.unknown(words)
    total_words = len(words)
    correct_words = total_words - len(misspelled)
    accuracy = correct_words / total_words if total_words > 0 else 0
    print(f"Total Words: {total_words}")
    print(f"Correct Words: {correct_words}")
    print(f"Misspelled Words: {len(misspelled)}")
    print(f"Misspelled Words List: {', '.join(misspelled)}")
    print(f"Spelling Accuracy: {accuracy:.2%}")
    return accuracy

def save_to_csv(parsed_recipes, output_csv_path):
    with open(output_csv_path, 'w', encoding='utf-8', newline='') as csv_file:
        writer = csv.DictWriter(csv_file, fieldnames=['title', 'ingredients', 'instructions', 'accuracy'])
        writer.writeheader()
        for recipe in parsed_recipes:
            writer.writerow({
                'title': recipe['title'],
                'ingredients': '; '.join(recipe['ingredients']),
                'instructions': recipe['instructions'],
                'accuracy': f"{recipe['accuracy']:.2%}"
            })

def process_recipes_from_folder(image_folder, output_csv_path):
    parsed_recipes = []
    for filename in os.listdir(image_folder):
        if filename.lower().endswith(('.png', '.jpeg', '.jpg')):
            image_path = os.path.join(image_folder, filename)
            print(f"\nProcessing {filename}...")
            ocr_text = extract_text_with_easyocr(image_path)
            if not ocr_text.strip():
                print(f"Warning: No text extracted from {filename}. Skipping.")
                continue
            print(f"OCR Output for {filename}:\n{ocr_text}\n")
            accuracy = calculate_spelling_accuracy(ocr_text)
            parsed_recipe = parse_recipe(ocr_text)
            parsed_recipe['accuracy'] = accuracy
            if (parsed_recipe['title'] == "N/A" and
                parsed_recipe['ingredients'] == ["N/A"] and
                parsed_recipe['instructions'] == "N/A"):
                print(f"Skipping empty recipe from {filename}.")
                continue
            parsed_recipes.append(parsed_recipe)
    save_to_csv(parsed_recipes, output_csv_path)
    print(f"\nAll recipes processed and saved to {output_csv_path}.")

image_folder = '../recipe_pics'
output_csv_path = 'parsed_recipes.csv'
process_recipes_from_folder(image_folder, output_csv_path)



Processing 1c30062c9802965bb70b65d5f00c5255.jpg...
OCR Output for 1c30062c9802965bb70b65d5f00c5255.jpg:
Tablea
Cupcakes
AAs
Dooky

Total Words: 4
Correct Words: 1
Misspelled Words: 3
Misspelled Words List: dooky, aas, tablea
Spelling Accuracy: 25.00%

Processing 1c659613eff2a71acd6a8126b672a93f.jpg...
OCR Output for 1c659613eff2a71acd6a8126b672a93f.jpg:
Forily
Pasta Carbonara

Total Words: 3
Correct Words: 1
Misspelled Words: 2
Misspelled Words List: carbonara, forily
Spelling Accuracy: 33.33%

Processing 1c7ea2edc0c5d5a4cb5a2f33b96114b1.jpg...
OCR Output for 1c7ea2edc0c5d5a4cb5a2f33b96114b1.jpg:
tasty Recipes
4d
Chicken Bacon Ranch Roll-Ups
Ingredients:
6 slices of mozzarella cheese
1 1/2 cups cooked shredded or
chicken
1/4 cup cooked bacon (2-3 slices)
tablespoon low-carb ranch dressing
1 teaspoon finely sliced green onion
Instructions:
1. Combine shredded chicken; cooked bacon; ranch dressing; and
green onions in a bowl. Set aside.
2 Preheat the oven to 350PF and line a large bakin

In [3]:
def calculate_spelling_accuracy(text_file):

    spell = SpellChecker()

    with open(text_file, 'r', encoding='utf-8') as f:
        text_content = f.read()

    words = text_content.split()

    misspelled = spell.unknown(words)

    total_words = len(words)
    correct_words = total_words - len(misspelled)
    accuracy = correct_words / total_words if total_words > 0 else 0

    print(f"Total Words: {total_words}")
    print(f"Correct Words: {correct_words}")
    print(f"Misspelled Words: {len(misspelled)}")
    print(f"Spelling Accuracy: {accuracy:.2%}")

    return accuracy

text_file = 'parsed_recipes.csv'
calculate_spelling_accuracy(text_file)

Total Words: 17371
Correct Words: 10354
Misspelled Words: 7017
Spelling Accuracy: 59.61%


0.5960508894133901