In [1]:
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import google.generativeai as genai
from PIL import Image
import json
import re

# Konfigurasi API Gemini
genai.configure(api_key="AIzaSyCG8IGd5lgD4m2UocqUGOyGtyd0jM6O4vU")


In [2]:
def parse_receipt(image_path):
    img = Image.open(image_path)
    prompt = '''
    Ini adalah struk belanja. Tolong ekstrak informasinya dalam format JSON dengan field:
    - invoice_number
    - Phone
    - alamat
    - email
    - nama_toko
    - tanggal
    - daftar_barang (array of objects: nama, qty, harga_satuan, subtotal)
    - total_belanja
    Hanya kembalikan JSON-nya saja, tanpa penjelasan atau markdown formatting.
    '''
    model = genai.GenerativeModel("gemini-2.5-flash")
    response = model.generate_content([prompt, img])
    return response.text

def clean_json_response(text):
    return json.loads(re.sub(r'^```json|```$', '', text.strip(), flags=re.MULTILINE))


In [3]:
df = pd.read_excel('data.xlsx', usecols=range(9), skiprows=2, sheet_name='All NIVEA PL Mei 2024')
df = df.dropna()
data_pusat = df['Description'].astype(str).str.lower().tolist()
harga_pusat = df['Harga Jual ke Konsumen yg Disarankan'].tolist()


In [4]:
model_minilm = SentenceTransformer("all-MiniLM-L6-v2")
model_bge = SentenceTransformer("BAAI/bge-small-en-v1.5")
model_gte = SentenceTransformer("thenlper/gte-small")
model_labse = SentenceTransformer("sentence-transformers/LaBSE")

vektor_minilm = model_minilm.encode(data_pusat, convert_to_tensor=True)
vektor_bge = model_bge.encode(data_pusat, convert_to_tensor=True)
vektor_gte = model_gte.encode(data_pusat, convert_to_tensor=True)
vektor_labse = model_labse.encode(data_pusat, convert_to_tensor=True)


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


In [6]:
image_path = "img/Aeon 1.jpg"  # ganti sesuai path Anda
result_text = parse_receipt(image_path)
result_data = clean_json_response(result_text)

df_barang = pd.DataFrame(result_data['daftar_barang'])
df_info = pd.DataFrame({
    'nama_toko': [result_data['nama_toko']],
    'tanggal': [result_data['tanggal']],
    'total_belanja': [result_data['total_belanja']],
    'invoice_number': [result_data['invoice_number']],
    'Phone': [result_data['Phone']],
    'alamat': [result_data['alamat']],
    'email': [result_data['email']]
})


InvalidArgument: 400 API key not valid. Please pass a valid API key. [reason: "API_KEY_INVALID"
domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
, locale: "en-US"
message: "API key not valid. Please pass a valid API key."
]

In [None]:
threshold = 0.59
hasil = []
data_struk = df_barang['nama'].astype(str).str.lower().tolist()

for i, item in enumerate(data_struk):
    hasil_item = {"Nama Struk": item, "Qty": df_barang.loc[i, 'qty'], "Nama Toko": df_info['nama_toko'][0]}
    for model_name, model, vektor in [
        ("MiniLM", model_minilm, vektor_minilm),
        ("BGE", model_bge, vektor_bge),
        ("GTE", model_gte, vektor_gte),
        ("LaBSE", model_labse, vektor_labse)
    ]:
        vektor_item = model.encode(item, convert_to_tensor=True)
        similarity = util.cos_sim(vektor_item, vektor)
        idx_best = similarity.argmax()
        skor = similarity[0][idx_best].item()
        if skor >= threshold:
            nama_pusat = data_pusat[idx_best]
            harga_jual = harga_pusat[idx_best]
            total = harga_jual * df_barang.loc[i, 'qty']
        else:
            nama_pusat = "Tidak ada"
            harga_jual = None
            total = None
        hasil_item[f'Cocok {model_name}'] = nama_pusat
        hasil_item[f'Skor {model_name}'] = round(skor, 3)
        hasil_item[f'Harga {model_name}'] = harga_jual
        hasil_item[f'Total {model_name}'] = total
    hasil.append(hasil_item)

df_hasil = pd.DataFrame(hasil)


In [None]:
df_hasil.head(10)
