In [5]:
import os
import glob
import xml.etree.ElementTree as ET
import shutil

SOURCE_DIR = "food-product-image-dataset-master/data/img"

IMAGES_OUTPUT_DIR = "dataset/images/train"
LABELS_OUTPUT_DIR = "dataset/labels/train"

os.makedirs(IMAGES_OUTPUT_DIR, exist_ok=True)
os.makedirs(LABELS_OUTPUT_DIR, exist_ok=True)

class_name_to_id = {}

In [4]:
import pandas as pd

df_products_english = pd.read_csv('product_information_english.csv')

# Convert GTIN to string, in case it’s int64
df_products_english['GTIN'] = df_products_english['GTIN'].astype(str)

# Create a set of all GTINs you care about
allowed_prefixes = set(df_products_english['GTIN'].unique())
print("Allowed prefixes:", allowed_prefixes)

  from . import _distributor_init


Allowed prefixes: {'4061458221337', '4061458072465', '4009286154359', '5600821261717', '4023900544869', '4061458000888', '4061458252645', '4061458079525', '4061458004206', '76121043', '4061458252669', '4061458003889', '4061458144827', '4060800300324', '4014500517407', '4061458006897', '5000112548167', '4061458000970', '22151179', '4316268591867', '90162565', '4104420021242', '4005500087151', '4008452031067', '5000159471510', '5707196160192', '4011932236012', '4061458040716', '4305615662886', '90433627', '4061458016933'}


In [7]:
def register_class_name(name):
    if name not in class_name_to_id:
        class_name_to_id[name] = len(class_name_to_id)
        

def convert_bbox_to_yolo(size, xmin, xmax, ymin, ymax):
    dw = 1.0/size[0]
    dh = 1.0/size[1]
    
    x_center = (xmin + xmax)/2.0
    y_center = (ymin + ymax)/ 2.0
    w = xmax - xmin
    h = ymax - ymin
    
    x_center = x_center * dw
    y_center = y_center * dh
    
    w = w * dw
    h = h * dh
    
    return (x_center, y_center, w, h)

In [9]:
## Iterate over the .xml files in allowed prefixes
xml_files = glob.glob(os.path.join(SOURCE_DIR, "*.xml"))

for xml_file in xml_files:
    base_xml_name = os.path.splitext(os.path.basename(xml_file))[0]
    
    product_prefix = base_xml_name.split("_")[0]
    
    if product_prefix not in allowed_prefixes:
        continue
    
    tree = ET.parse(xml_file)
    root = tree.getroot()
    
    filename = root.find('filename').text
    src_img_path = os.path.join(SOURCE_DIR, filename)
    
    size_node = root.find("size")
    img_width = int(size_node.find('width').text)
    img_height = int(size_node.find('height').text)
    
    yolo_bboxes = []
    
    for obj in root.findall('object'):
        class_name = obj.find('name').text
        register_class_name(class_name)
        
        class_id = class_name_to_id[class_name]
        
        bndbox = obj.find('bndbox')
        xmin = float(bndbox.find('xmin').text)
        xmax = float(bndbox.find('xmax').text)
        ymin = float(bndbox.find('ymin').text)
        ymax = float(bndbox.find('ymax').text)
        
        x_center, y_center, w, h = convert_bbox_to_yolo((img_height, img_width), xmin, xmax, ymin, ymax)
        yolo_bboxes.append(f"{class_id} {x_center} {y_center} {w} {h}")
        
    # Write YOLO bboxes to .txt
    yolo_label_path = os.path.join(LABELS_OUTPUT_DIR, base_xml_name + ".txt")
    with open(yolo_label_path, "w") as f:
        f.write("\n".join(yolo_bboxes))

    # Copy the .JPG into IMAGES_OUTPUT_DIR
    dst_img_path = os.path.join(IMAGES_OUTPUT_DIR, filename)
    shutil.copy2(src_img_path, dst_img_path)

# After processing, print the class map
print("Class to ID mapping:")
for cname, cid in class_name_to_id.items():
    print(f"{cid} -> {cname}")

# Optionally save classes to a file
with open("classes.txt", "w") as f:
    for cname in sorted(class_name_to_id, key=lambda x: class_name_to_id[x]):
        f.write(f"{cname}\n")
        

Class to ID mapping:
0 -> logo
1 -> variantDescription
2 -> productName
3 -> barcode
4 -> brandName
5 -> product
6 -> packagingRecyclingProcessType
7 -> netContent
8 -> drainedWeight
9 -> detailedProductName
10 -> ingredientStatement
11 -> address
12 -> nutritionTable
13 -> bestBeforeDate
14 -> hasBatchLotNumber
15 -> packagingMarkedLabelAccreditation
16 -> allergenStatement
17 -> countryOfOrigin
18 -> instructions
19 -> energyPerNutrientBasis
20 -> hasReturnablePackageDeposit
21 -> identityMark
22 -> organicClaim
23 -> qrCode
24 -> manufacturer
25 -> nutriScore
26 -> isFrozen


In [11]:
for key in class_name_to_id.keys():
    print(" - ", key)

 -  logo
 -  variantDescription
 -  productName
 -  barcode
 -  brandName
 -  product
 -  packagingRecyclingProcessType
 -  netContent
 -  drainedWeight
 -  detailedProductName
 -  ingredientStatement
 -  address
 -  nutritionTable
 -  bestBeforeDate
 -  hasBatchLotNumber
 -  packagingMarkedLabelAccreditation
 -  allergenStatement
 -  countryOfOrigin
 -  instructions
 -  energyPerNutrientBasis
 -  hasReturnablePackageDeposit
 -  identityMark
 -  organicClaim
 -  qrCode
 -  manufacturer
 -  nutriScore
 -  isFrozen


In [1]:
import os
import cv2
import pandas as pd
import easyocr

In [2]:
VAL_IMAGES_DIR = "dataset/images/val"
VAL_LABELS_DIR = "dataset/labels/val"

In [3]:
reader = easyocr.Reader(['de'], gpu=True, verbose=True)

results_list = []

In [5]:
for image_name in os.listdir(VAL_IMAGES_DIR):
    if not image_name.lower().endswith(".jpg"):
        continue
    print("Here1")
    image_path = os.path.join(VAL_IMAGES_DIR, image_name)
    
    label_name = os.path.splitext(image_name)[0] + ".txt"
    label_path = os.path.join(VAL_LABELS_DIR, label_name)
    
    if not os.path.exists(label_path):
        continue
    
    img = cv2.imread(image_path)
    
    if img is None:
        continue
    
    h, w, _ = img.shape
    
    with open(label_path, "r") as f:
        lines = f.read().strip().split("\n")
    print("here2")
    for line in lines:
        parts = line.strip().split()
        if len(parts) != 5:
            continue
        class_id = int(parts[0])
        x_center = float(parts[1])
        y_center = float(parts[2])
        bbox_width = float(parts[3])
        bbox_height = float(parts[4])
        
        abs_xmin = int((x_center - bbox_width/2) * w)
        abs_xmax = int((x_center + bbox_width/2) * w)
        abs_ymin = int((y_center - bbox_height/2) * h)
        abs_ymax = int((y_center + bbox_height/2) * h)
        
        roi = img[abs_ymin:abs_ymax, abs_xmin: abs_xmax]
        
        ocr_result = reader.readtext(roi)
        
        print(ocr_result)
        
        extracted_text = " ".join([r[1] for r in ocr_result]) if ocr_result else ""
        
        
        results_list.append({
            "image_name": image_name,
            "class_id": class_id,
            "xmin": abs_xmin,
            "ymin": abs_ymin,
            "xmax": abs_xmax,
            "ymax": abs_ymax,
            "recognized_text": extracted_text
        })
        


# Convert list of dicts to DataFrame
df_ocr = pd.DataFrame(results_list)
df_ocr.to_csv("ocr_results.csv", index=False)

print("OCR results saved to ocr_results.csv")

Here1
here2
[([[0, 0], [1017, 0], [1017, 408], [0, 408]], '@ccoot', 0.15487295891299785)]
[]
[([[0, 0], [542, 0], [542, 141], [0, 141]], 'ligBt taate', 0.6016568148945319)]
[([[0, 136], [24, 136], [24, 196], [0, 196]], '9', 0.4262082610795268)]
[([[335, 1589], [818, 1589], [818, 1651], [335, 1651]], 'OHNE KALORIEN', 0.9981811362819365), ([[32, 1644], [1160, 1644], [1160, 2162], [32, 2162]], '@ccGa', 0.13605321102236279), ([[287, 2135], [867, 2135], [867, 2315], [287, 2315]], 'ligft taate', 0.6551361025733373)]
Here1
here2
[([[9, 4], [370, 4], [370, 93], [9, 93]], 'D Eementienes', 0.429177894350446), ([[394, 0], [802, 0], [802, 77], [394, 77]], 'Gzntee Gatränk', 0.3786884406237547), ([[833, 13], [917, 13], [917, 77], [833, 77]], 'mit', 0.9877313826746232), ([[938, 20], [1080, 20], [1080, 94], [938, 94]], 'Iq', 0.6842534730003275), ([[0, 68], [1112, 68], [1112, 180], [0, 180]], "[Zionengeschmack   Zutaten: Wasser (1), Rohruicer' @", 0.12309595910917445), ([[7, 150], [1008, 150], [1008, 2