<a href="https://colab.research.google.com/github/Snathika25/Safety-Scan/blob/main/backend.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [45]:
import re
import io
import json
import numpy as np
from datetime import datetime, date
from PIL import Image
import easyocr
import dateparser

reader = easyocr.Reader(['en'], gpu=False)


def robust_ocr(image_bytes: bytes) -> str:
    image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
    result = reader.readtext(np.array(image), detail=0)
    return " ".join(result)


def clean_ocr_text(text: str) -> str:
    text = text.replace(",", ".")
    text = re.sub(r"\s*;\s*", ".", text)
    text = re.sub(r"\s+", " ", text)
    return text.strip()


def parse_date(date_str: str):
    return dateparser.parse(
        date_str,
        settings={"DATE_ORDER": "DMY", "STRICT_PARSING": False}
    )


def extract_dates_with_inference(text: str):
    full_pattern = r"\d{1,2}[./-]\d{1,2}[./-]\d{2,4}"
    partial_pattern = r"\b\d{1,2}[./-]\d{1,2}\b"

    full_dates = []
    for d in re.findall(full_pattern, text):
        parsed = parse_date(d)
        if parsed:
            full_dates.append(parsed)

    partial_dates = re.findall(partial_pattern, text)
    inferred_dates = []

    # Case: 1 full date + 1 partial date
    if len(full_dates) == 1 and len(partial_dates) >= 1:
        full_date = full_dates[0]
        for pd in partial_dates:
            day, month = re.split(r"[./-]", pd)
            inferred = datetime(
                year=full_date.year - 2,
                month=int(month),
                day=int(day)
            )
            inferred_dates.append(inferred)

        inferred_dates.append(full_date)
        return sorted(inferred_dates)

    if len(full_dates) >= 2:
        return sorted(full_dates)

    return []


def check_product_safety(expiry_date: date | None) -> str:
    """
    Determines whether the product is safe to use.
    """
    if not expiry_date:
        return "Unknown (expiry date not detected)"

    today = date.today()

    if today <= expiry_date:
        return "Safe to use"
    else:
        return "Not safe to use (expired)"


def extract_mfg_exp_from_text(text: str) -> dict:
    text = clean_ocr_text(text)
    dates = extract_dates_with_inference(text)

    mfg = exp = None
    if len(dates) >= 2:
        mfg = dates[0].date()
        exp = dates[-1].date()

    safety_status = check_product_safety(exp)

    return {
        "Manufacturing Date": mfg.isoformat() if mfg else None,
        "Expiry Date": exp.isoformat() if exp else None,
        "Safety Status": safety_status,
        #"raw_text": text
    }




In [43]:
def process_images(image_inputs: dict) -> dict:
    results = {}
    for filename, image_bytes in image_inputs.items():
        text = robust_ocr(image_bytes)
        extracted = extract_mfg_exp_from_text(text)
        results[filename] = extracted
    return results


In [44]:
from google.colab import files

uploaded = files.upload()
results = process_images(uploaded)
print(json.dumps(results, indent=2))


Saving Screenshot 2025-09-27 142817.png to Screenshot 2025-09-27 142817 (8).png




{
  "Screenshot 2025-09-27 142817 (8).png": {
    "manufacturing_date": "2024-06-17",
    "expiry_date": "2026-06-17",
    "safety_status": "Safe to use",
    "raw_text": "Yalamy dall 17.06.26 17.06 EVH142 HEalenti :"
  }
}
