# Google Vision Cloud API FOR OCR

In [9]:
import os
from google.cloud import vision
import cv2

# Set up the environment variable for Google Cloud authentication
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = r"C:\Point Detection\signature-429903-6c362955b89e.json"

# Initialize the Google Cloud Vision client
client = vision.ImageAnnotatorClient()

def detect_text(image_path):
    with open(image_path, "rb") as image_file:
        content = image_file.read()

    image = vision.Image(content=content)
    response = client.text_detection(
        image=image,
        image_context={"language_hints": ["zh", "en"]}
    )
    if response.error.message:
        raise Exception(f"OCR error: {response.error.message}")
    return response.text_annotations

if __name__ == "__main__":
    image_path = r"C:\Point Detection\preprocessing\raw\84829a3c746d149f192b1750b37df334.png"
    texts = detect_text(image_path)

    # 1. Make sure the output folder exists
    out_dir = r"C:\\Point Detection\\preprocessing\\ocr_output"
    os.makedirs(out_dir, exist_ok=True)

    # 2. Write OCR output to a text file in that folder
    output_file = os.path.join(out_dir, "ocr_output.txt")
    with open(output_file, 'w', encoding='utf-8') as file:
        for text in texts:
            print(text.description)
            file.write(text.description + "\n")

    print(f"Text has been written to {output_file}")


Holiland
Travel
26-07-02 18:43:28
|20000043250702184258
品名
零食价 数量
金额
餐饮美
玉米芝士三明治
15.00
1
15.00
焦香梛髒奶酥包
19.00
1
19.00
商品共3
半熟芝士-糕点(奥巧口味)
140.00
1
40.00
总数:
3
总计:
74.00
实收:
74.00
美团点评:
40.00
支付宝:
34.00
正大广场店 021-61090592
发票查询流水号:220179251830060571
申请开发票二维码:
Holiland
Travel
26-07-02
18:43:28
|
20000043250702184258
品名
零食
价
数量
金额
餐饮
美
玉米
芝士
三明治
15.00
1
15.00
焦
香
梛
髒
奶酥
包
19.00
1
19.00
商品
共
3
半熟
芝士
-
糕点
(
奥
巧
口味
)
140.00
1
40.00
总数
:
3
总计
:
74.00
实
收
:
74.00
美
团
点评
:
40.00
支付
宝
:
34.00
正大
广场
店
021-61090592
发票
查询
流水号
:
220179251830060571
申请
开发
票
二维
码
:
Text has been written to C:\\Point Detection\\preprocessing\\ocr_output\ocr_output.txt


# Use AI to find time and date and recognizes brand
Setup Model

In [12]:
from dotenv import dotenv_values
import os

# 1. Read only valid KEY=VALUE lines (ignores the rest)
config = dotenv_values(r"C:\Point Detection\.env")

# 2. Pull out your key
api_key = config.get("OPENAI_API_KEY")
if not api_key:
    raise EnvironmentError("OPENAI_API_KEY not found in .env")

# 3. Stick it into os.environ for the rest of your code
os.environ["OPENAI_API_KEY"] = api_key

# 4. Initialize your model
from langchain.chat_models import init_chat_model
model = init_chat_model("gpt-4o-mini", model_provider="openai")


In [11]:
from dotenv import load_dotenv
import os, json
import openai

# 1) Load env & set API key
load_dotenv(r"C:\Point Detection\.env")
openai.api_key = os.getenv("OPENAI_API_KEY")

# 2) Read your OCR output
ocr_path = r"C:\Point Detection\preprocessing\ocr_output\ocr_output.txt"
with open(ocr_path, encoding="utf-8") as f:
    ocr_text = f.read()

# 3) Define the function schema
functions = [
    {
        "name": "parse_summary",
        "description": "Extract store_name, receipt_number, date_time, total_price",
        "parameters": {
            "type": "object",
            "properties": {
                "store_name":     {"type": "string"},
                "receipt_number": {"type": "string"},
                "date_time":      {"type": "string", "format": "date-time"},
                "total_price":    {"type": "number"}
            },
            "required": ["store_name", "receipt_number", "date_time", "total_price"]
        }
    }
]

# 4) Call the new Chat Completions API
resp = openai.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {"role":"system", "content":"Extract key fields from a Chinese/English receipt."},
        {"role":"user",   "content":ocr_text}
    ],
    functions=functions,
    function_call={"name":"parse_summary"}
)

# 5) Pull out the JSON arguments
call = resp.choices[0].message.function_call
data = json.loads(call.arguments)

# 6) (Optional) save to file
out_path = r"C:\\Point Detection\\preprocessing\\processed\\parsed_summary.json"
with open(out_path, "w", encoding="utf-8") as out:
    json.dump(data, out, ensure_ascii=False, indent=2)

# 7) Print the result
print(json.dumps(data, ensure_ascii=False, indent=2))


{
  "store_name": "Holiland",
  "receipt_number": "20000043250702184258",
  "date_time": "2026-07-02T18:43:28",
  "total_price": 74
}
