In [2]:
!which python

/Users/beszabo/codes/progress_til_beadas/.venv/bin/python


In [3]:
import math, keras_ocr,re, os
from ocr.brands_list import brands
from groq import Groq
from pathlib import Path

GROQ_API_KEY = os.getenv('GROQ_API_KEY')

In [4]:
# Function to get distances for detections
def get_distance(predictions):
    x0, y0 = 0, 0
    detections = []
    for group in predictions:
        top_left_x, top_left_y = group[1][0]
        bottom_right_x = group[1][1][0]
        bottom_right_y = group[1][3][1] # correct bottom-right coordinates
        center_x = (top_left_x + bottom_right_x) / 2
        center_y = (top_left_y + bottom_right_y) / 2
        distance_from_origin = math.dist([x0, y0], [center_x, center_y])
        distance_y = center_y - y0
        detections.append({
            "text": group[0],
            "center_x": center_x,
            "center_y": center_y,
            "distance_from_origin": distance_from_origin,
            "distance_y": distance_y,
        })
    return detections

# Function to distinguish rows
def distinguish_rows(lst, thresh=15):
    sublists = []
    for i in range(len(lst) - 1):
        if lst[i + 1]["distance_y"] - lst[i]["distance_y"] <= thresh:
            if lst[i] not in sublists:
                sublists.append(lst[i])
            sublists.append(lst[i + 1])
        else:
            yield sublists
            sublists = [lst[i + 1]]
    yield sublists
    
# Check for brand keywords
def contains_expression(word_list, expressions):
    result_list = []
    for key in expressions:
        key_lower = re.escape(key.lower())
        # Check if the key exists as a complete word
        if re.search(rf'\b{key_lower}\b', word_list):
            result_list.append(key)
        # Check if any of the associated values exist as complete words
        for value in expressions[key]:
            value_lower = re.escape(value.lower())
            if re.search(rf'\b{value_lower}\b', word_list):
                result_list.append(key)
    if(not result_list):
        return None
    return result_list

In [5]:
def initialize_ocr_pipeline():
    """Initialize and return keras-ocr pipeline"""
    print("[DEBUG] Initializing keras-ocr pipeline")
    return keras_ocr.pipeline.Pipeline()

def read_image_safely(image_path):
    """Read image with error handling"""
    try:
        print(f"[DEBUG] Attempting to read image: {image_path}")
        read_image = keras_ocr.tools.read(image_path)
        if read_image is None or read_image.size == 0:
            raise ValueError("Image is empty or None")
        print("[DEBUG] Successfully read image")
        return read_image
    except Exception as e:
        print(f"[DEBUG] Failed to read image: {image_path}. Error: {e}")
        return None

def check_brand_with_llm(text, brand, api_key):
    """Check ambiguous brand mentions using LLM"""
    print(f"[DEBUG] Checking ambiguous brand '{brand}' with LLM")
    system_message = f"you decide if the sentence is about {brand} as a brand or the context says otherwise. Your answers can be: 'yes, no, cannot decide'"
    
    client = Groq(api_key=api_key)
    print("[DEBUG] Making LLM API call")
    chat_completion = client.chat.completions.create(
        messages=[
            {"role": "system", "content": system_message},
            {"role": "user", "content": text}
        ],
        model="llama3-70b-8192",
    )
    
    response_content = chat_completion.choices[0].message.content
    print(f"[DEBUG] LLM response: {response_content}")
    return "yes" in response_content.lower()

def process_single_image(image_path, pipeline, brands, brands_dict, api_key, ambiguous_brands):
    """Process a single image and update brands dictionary"""
    print(f"\n[DEBUG] Processing image: {image_path}")
    
    # Read image
    read_image = read_image_safely(image_path)
    if read_image is None:
        print("[DEBUG] Skipping image due to read failure")
        return brands_dict
        
    # Get predictions
    print("[DEBUG] Running OCR prediction")
    prediction_groups = pipeline.recognize([read_image])
    predictions = get_distance(prediction_groups[0])
    print(f"[DEBUG] Raw predictions: {predictions}")
    
    # Process predictions
    predictions = distinguish_rows(predictions)
    if predictions is None:
        predictions = []
    
    predictions = list(filter(lambda x: x != [], predictions))
    ordered_preds = [each["text"] for row in predictions for each in sorted(row, key=lambda x: x["distance_from_origin"])]
    ordered_text = " ".join(ordered_preds).lower()
    print(f"[DEBUG] Processed text: {ordered_text}")

    # Check for brands
    result_list = contains_expression(ordered_text, brands)
    print(f"[DEBUG] Detected brands: {result_list}")

    if result_list:
        filename = os.path.basename(image_path)

        for brand in result_list:
            print(f"[DEBUG] Processing brand: {brand}")
            if brand in ambiguous_brands:
                print(f"[DEBUG] {brand} is ambiguous, checking with LLM")
                if check_brand_with_llm(ordered_text, brand, api_key):
                    if filename not in brands_dict[brand]:
                        print(f"[DEBUG] Adding {filename} to {brand}")
                        brands_dict[brand].append(filename)
            else:
                if brand is not None and filename not in brands_dict[brand]:
                    print(f"[DEBUG] Adding {filename} to {brand}")
                    brands_dict[brand].append(filename)
                    
    return brands_dict

def process_images_in_directory(directory, brands, api_key, ambiguous_brands):
    """Process all images in directory and detect brands"""
    print(f"\n[DEBUG] Starting directory processing: {directory}")
    # Initialize OCR pipeline once
    pipeline = initialize_ocr_pipeline()
    brands_dict = {brand: [] for brand in brands}
    print(f"[DEBUG] Initialized brands dictionary with {len(brands)} brands")

    # Process files directly in the directory
    for filename in os.listdir(directory):
        image_path = os.path.join(directory, filename)
        if os.path.isfile(image_path):
            print(f"\n[DEBUG] Processing file: {filename}")
            brands_dict = process_single_image(
                image_path,
                pipeline,
                brands,
                brands_dict,
                api_key,
                ambiguous_brands
            )
    
    print("[DEBUG] Finished processing all images")
    return brands_dict


In [6]:

# Example usage
ambiguous_brands = ["apple", "oracle", "amazon", "tesla", "oracle", "visa", "zara", "ge", "ford", "corona", "intel", "linkedin", "hp", "hermes"]
print("[DEBUG] Starting brand detection with ambiguous brands:", ambiguous_brands)
results = process_images_in_directory(
    Path('ocr/brands_images'), 
    brands, 
    GROQ_API_KEY, 
    ambiguous_brands
)

[DEBUG] Starting brand detection with ambiguous brands: ['apple', 'oracle', 'amazon', 'tesla', 'oracle', 'visa', 'zara', 'ge', 'ford', 'corona', 'intel', 'linkedin', 'hp', 'hermes']

[DEBUG] Starting directory processing: ocr/brands_images
[DEBUG] Initializing keras-ocr pipeline
Looking for /Users/beszabo/.keras-ocr/craft_mlt_25k.h5
Instructions for updating:
Use `tf.image.resize(...method=ResizeMethod.BILINEAR...)` instead.
Looking for /Users/beszabo/.keras-ocr/crnn_kurapan.h5
[DEBUG] Initialized brands dictionary with 100 brands

[DEBUG] Processing file: .DS_Store

[DEBUG] Processing image: ocr/brands_images/.DS_Store
[DEBUG] Attempting to read image: ocr/brands_images/.DS_Store
[DEBUG] Failed to read image: ocr/brands_images/.DS_Store. Error: OpenCV(4.11.0) /Users/xperience/GHA-Actions-OpenCV/_work/opencv-python/opencv-python/opencv/modules/imgproc/src/color.cpp:199: error: (-215:Assertion failed) !_src.empty() in function 'cvtColor'

[DEBUG] Skipping image due to read failure

[DEB

  return lib.oriented_envelope(geometry, **kwargs)
  return lib.oriented_envelope(geometry, **kwargs)


[DEBUG] Raw predictions: [{'text': 'lq', 'center_x': 105.0, 'center_y': 97.0, 'distance_from_origin': 142.9475428260311, 'distance_y': 97.0}, {'text': 'airbnb', 'center_x': 118.99998474121094, 'center_y': 189.99998474121094, 'distance_from_origin': 224.1896308263806, 'distance_y': 189.99998474121094}]
[DEBUG] Processed text: airbnb
[DEBUG] Detected brands: ['airbnb']
[DEBUG] Processing brand: airbnb
[DEBUG] Adding airbnb_example.jpg to airbnb
[DEBUG] Finished processing all images


In [7]:
non_empty_brands_dict = {key: value for key, value in results.items() if value}

for key, value in non_empty_brands_dict.items():
    print(f"{key}: {len(value)}")

airbnb: 1
