# Setting up Database POSTGRES PG ADMIN 4

In [None]:
from dotenv import load_dotenv
import os, psycopg2
from openai import OpenAI

load_dotenv(r"C:\Point Detection\.env")

# Debug: print out the DSN we’re about to use
dsn = os.getenv("DATABASE_URL")
print("Using DATABASE_URL:", repr(dsn))

try:
    conn = psycopg2.connect(dsn)
    print("✅ Connected to Postgres!")
except Exception as e:
    print("❌ Connection failed:", e)
    raise

# If we get here, grab a cursor and close
cur = conn.cursor()
cur.execute("SELECT 1;")
print("Postgres replied:", cur.fetchone())
cur.close()
conn.close()


Using DATABASE_URL: 'postgres://myapp_user:superbrandmall@localhost:5432/myapp_db'
✅ Connected to Postgres!
Postgres replied: (1,)


# Fuzzy-matching from store_name with store_categories

In [None]:
import os
import json
import difflib
import psycopg2
from dotenv import load_dotenv

# ─── 0) (Re)connect ─────────────────────────────────────────────────
load_dotenv(r"C:\Point Detection\.env")
conn = psycopg2.connect(os.getenv("DATABASE_URL"))
cur  = conn.cursor()

# ─── 1) Load the parsed summary JSON ────────────────────────────────
summary_path = r"C:\Point Detection\preprocessing\processed\parsed_summary.json"
with open(summary_path, encoding="utf-8") as f:
    store_name = json.load(f)["store_name"]

# ─── 2) Fetch your in-DB map ─────────────────────────────────────────
cur.execute("SELECT store_name, bucket FROM store_categories;")
store_map   = dict(cur.fetchall())
store_keys  = list(store_map.keys())

# ─── 3) Fuzzy-match strictly ────────────────────────────────────────
cutoff  = 0.6
matches = difflib.get_close_matches(store_name, store_keys, n=1, cutoff=cutoff)
if not matches:
    raise ValueError(f"No match in store_categories for: {store_name!r}")

best   = matches[0]
bucket = store_map[best]

# ─── 4) Print your result ───────────────────────────────────────────
print(f"Extracted store_name: {store_name!r}")
print(f"Matched to:           {best!r} → Category: {bucket!r}")

# ─── 5) Clean up ────────────────────────────────────────────────────
cur.close()
conn.close()


Extracted store_name: 'Holiland'
Matched to:           'Holiland Travel' → Category: '餐饮'


# Accuracy check

In [None]:
# … assume store_name, best, store_map already defined …

from difflib import SequenceMatcher

ratio = SequenceMatcher(None, store_name, best).ratio()
bucket = store_map[best]

print(f"Extracted store_name: {store_name!r}")
print(f"Best match:           {best!r} → Category: {bucket!r}")
print(f"Match rate:           {ratio:.1%}")


NameError: name 'best_bucket' is not defined