# 商品详情页 价格×数量 提取与可视化

- 参考 GUI ‘OCR调参’ 的思路：多预处理变体 + Tesseract OCR。
- 从一张类似截图的图片中，按行提取左侧价格（可能带 K）与条末数量，形成 (price, qty) 列表。
- 按你的要求绘制暗色、低对比、单色、无标题/图例/轴标题的横向条形图：左侧为价格刻度，条末标注数量。

使用前准备：
- 需要安装系统级 Tesseract 可执行程序，并在 PATH 中（或位于 Windows 常见安装路径）。
- 需要 Python 包：opencv-python, pillow, pytesseract, numpy, matplotlib, ipywidgets。

建议：把要处理的截图放到仓库根目录（默认演示使用 `image.png`）。

In [None]:
import os, re, math, json, sys
from dataclasses import dataclass
from typing import List, Tuple, Optional, Dict

import numpy as np
import cv2
from PIL import Image
import pytesseract
import matplotlib.pyplot as plt

# 尝试自动定位 Windows 下的 tesseract.exe
if not getattr(pytesseract.pytesseract, 'tesseract_cmd', None):
    for _p in (
        r'C:\Program Files\\Tesseract-OCR\\tesseract.exe',
        r'C:\Program Files (x86)\\Tesseract-OCR\\tesseract.exe',
    ):
        if os.path.exists(_p):
            pytesseract.pytesseract.tesseract_cmd = _p
            break

print('Tesseract:', getattr(pytesseract.pytesseract, 'tesseract_cmd', 'from PATH'))


In [None]:
# —— 预处理：参考 gui/price_reader 的多变体策略，聚焦弱对比数字 ——
def preprocess_variants_for_digits(pil_img: Image.Image) -> List[np.ndarray]:
    bgr = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
    gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY)
    gchan = bgr[:, :, 1]
    hsv = cv2.cvtColor(bgr, cv2.COLOR_BGR2HSV)
    vchan = hsv[:, :, 2]

    chans = [gray, gchan, vchan]
    # 放大有助于 OCR
    chans = [cv2.resize(c, None, fx=2.0, fy=2.0, interpolation=cv2.INTER_CUBIC) for c in chans]

    clahe = cv2.createCLAHE(clipLimit=2.2, tileGridSize=(8, 8))
    chans = [clahe.apply(c) for c in chans]

    k2 = np.ones((2, 2), np.uint8)
    k3 = np.ones((3, 3), np.uint8)

    variants: List[np.ndarray] = []
    def add(v):
        if v is None:
            return
        v = (v > 0).astype(np.uint8) * 255
        variants.append(v)
        variants.append(255 - v)

    for c in chans:
        _, otsu = cv2.threshold(c, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        add(otsu)
        try:
            adp = cv2.adaptiveThreshold(c, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 31, 2)
            add(adp)
        except Exception:
            pass
        try:
            top = cv2.morphologyEx(c, cv2.MORPH_TOPHAT, k3, iterations=1)
            _, th_top = cv2.threshold(top, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
            add(th_top)
        except Exception:
            pass
        try:
            blk = cv2.morphologyEx(c, cv2.MORPH_BLACKHAT, k3, iterations=1)
            _, th_blk = cv2.threshold(blk, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
            add(th_blk)
        except Exception:
            pass

    # 针对近似 (#070708 背景, #606867 文本) 的颜色感知变体
    try:
        bg_bgr = np.array([8, 7, 7], dtype=np.float32)
        txt_bgr = np.array([103, 104, 96], dtype=np.float32)
        V = txt_bgr - bg_bgr
        Vn = float(np.dot(V, V)) or 1.0
        diff_bg = bgr.astype(np.float32) - bg_bgr
        diff_txt = bgr.astype(np.float32) - txt_bgr
        d_bg = np.sqrt(np.maximum(0.0, np.sum(diff_bg * diff_bg, axis=2)))
        d_txt = np.sqrt(np.maximum(0.0, np.sum(diff_txt * diff_txt, axis=2)))
        m_close = (d_txt + 5.0 < d_bg)
        m_close &= (d_txt < 220.0)
        m1 = (m_close.astype(np.uint8)) * 255
        variants.append(m1)
        proj = np.sum((bgr.astype(np.float32) - bg_bgr) * V, axis=2) / Vn
        proj = np.clip(proj, 0.0, 1.0)
        proj8 = (proj * 255.0).astype(np.uint8)
        _, thp = cv2.threshold(proj8, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        variants.append(thp)
    except Exception:
        pass

    # 轻度开闭 + 轻度膨胀，还要剔除疑似进度条（细高宽比极大）
    out: List[np.ndarray] = []
    H, W = bgr.shape[:2]
    for v in variants:
        try:
            x = cv2.morphologyEx(v, cv2.MORPH_OPEN, k2, iterations=1)
            x = cv2.morphologyEx(x, cv2.MORPH_CLOSE, k2, iterations=1)
            x = cv2.dilate(x, k2, iterations=1)
            cnts, _ = cv2.findContours((x > 0).astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            for c in cnts:
                rx, ry, rw, rh = cv2.boundingRect(c)
                if rw >= max(60, W // 6) and rh > 0 and (rw / max(1, rh)) >= 8.0:
                    cv2.rectangle(x, (rx, ry), (rx + rw, ry + rh), 0, -1)
            out.append(x)
        except Exception:
            out.append(v)

    # 按 (H, W, sum) 去重，保持体量可控
    seen = set()
    uniq: List[np.ndarray] = []
    for v in out:
        try:
            key = (v.shape[0], v.shape[1], int(v.sum()) % 1_000_000)
        except Exception:
            key = None
        if key and key in seen:
            continue
        if key:
            seen.add(key)
        uniq.append(v)
    return uniq


In [None]:
# —— OCR：返回每个 token 的边框与文本 ——
@dataclass
class Token:
    text: str
    left: int
    top: int
    width: int
    height: int
    conf: float
    @property
    def cx(self) -> float:
        return self.left + self.width / 2.0
    @property
    def cy(self) -> float:
        return self.top + self.height / 2.0

def ocr_tokens(pil_img: Image.Image, whitelist: str = '0123456789Kk,.', psm_list=(6,7,11,13)) -> List[Token]:
    tokens: List[Token] = []
    for psm in psm_list:
        cfg = f'--oem 3 --psm {psm} -c tessedit_char_whitelist={whitelist}'
        try:
            data = pytesseract.image_to_data(pil_img, config=cfg, output_type=pytesseract.Output.DICT)
        except Exception as e:
            continue
        n = len(data.get('text', []))
        for i in range(n):
            txt = (data.get('text', [''])[i] or '').strip()
            if not txt:
                continue
            try:
                l = int(data.get('left', [0])[i]); t = int(data.get('top', [0])[i])
                w = int(data.get('width', [0])[i]); h = int(data.get('height', [0])[i])
                cf = float(data.get('conf', [0])[i] or 0)
            except Exception:
                continue
            tokens.append(Token(txt, l, t, w, h, cf))
    # 去除重复（同一位置相近的文本，取置信度高者）
    dedup: List[Token] = []
    for tk in sorted(tokens, key=lambda x: -x.conf):
        dup = False
        for ex in dedup:
            if abs(tk.cx - ex.cx) <= 2 and abs(tk.cy - ex.cy) <= 2 and tk.text == ex.text:
                dup = True
                break
        if not dup:
            dedup.append(tk)
    return dedup

def parse_price(text: str) -> Optional[int]:
    # 兼容 '2,224K' / '2.224K' / '2224K' / '2224'
    s = text.strip().replace(',', '')
    k = False
    if s.endswith(('K','k')):
        k = True
        s = s[:-1]
    if s.count('.') == 1 and len(s.split('.')[1]) <= 3:
        try:
            base = float(s)
            val = int(round(base * (1000 if k else 1)))
            return val
        except Exception:
            return None
    if not s.isdigit():
        # 容错：提取其中的数字
        m = re.findall(r'\d+', s)
        if not m:
            return None
        s = ''.join(m)
    try:
        v = int(s)
        return v * 1000 if k else v
    except Exception:
        return None

def parse_int(text: str) -> Optional[int]:
    s = text.strip().replace(',', '')
    m = re.findall(r'\d+', s)
    if not m:
        return None
    try:
        return int(''.join(m))
    except Exception:
        return None


In [None]:
# —— 将 token 按行聚类，并抽取每行的左(价格)与右(数量) ——
def group_rows(tokens: List[Token], y_tol_px: Optional[int] = None) -> List[List[Token]]:
    if not tokens:
        return []
    # 估计一个行间容差：用 token 高度的中位数
    med_h = np.median([t.height for t in tokens]) or 10
    tol = int(y_tol_px if y_tol_px is not None else max(6, round(med_h * 0.6)))
    rows: List[List[Token]] = []
    for tk in sorted(tokens, key=lambda x: x.cy):
        placed = False
        for row in rows:
            cy = np.mean([t.cy for t in row])
            if abs(tk.cy - cy) <= tol:
                row.append(tk)
                placed = True
                break
        if not placed:
            rows.append([tk])
    # 每行按 x 排序
    for r in rows:
        r.sort(key=lambda t: t.cx)
    return rows

def extract_price_qty(pil_img: Image.Image, conf_min: float = 0.0, y_tol_px: Optional[int] = None) -> List[Tuple[int, int]]:
    # 多预处理变体，选取能得到最多有效 (price, qty) 对的那一个
    variants = preprocess_variants_for_digits(pil_img)
    if not variants:
        # 退化使用原图
        variants = [cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2GRAY)]
    best_pairs: List[Tuple[int, int]] = []
    for v in variants[:16]:
        # 转回 PIL 以走同一套 OCR
        if len(v.shape) == 2:
            pil_v = Image.fromarray(v)
        else:
            pil_v = Image.fromarray(cv2.cvtColor(v, cv2.COLOR_BGR2RGB))
        toks = [t for t in ocr_tokens(pil_v) if t.conf >= conf_min]
        if not toks:
            continue
        rows = group_rows(toks, y_tol_px=y_tol_px)
        pairs: List[Tuple[int, int]] = []
        W, H = pil_v.size
        for row in rows:
            left_t = row[0]
            right_t = row[-1]
            price = parse_price(left_t.text)
            qty = parse_int(right_t.text)
            if price is None and qty is None:
                # 容错：尝试在行内再找一次
                for tk in row:
                    if price is None:
                        price = parse_price(tk.text)
                    if qty is None:
                        qty = parse_int(tk.text)
            if price is not None and qty is not None:
                pairs.append((price, qty))
        if len(pairs) > len(best_pairs):
            best_pairs = pairs
    return best_pairs


In [None]:
# —— 绘图：暗色、单色、低对比、末端标注数量 ——
def format_price_k(v: int) -> str:
    if v >= 1000 and v % 1000 == 0:
        return f'{v//1000:,}K'
    return f'{v:,}'

def plot_price_qty_bars(pairs: List[Tuple[int,int]], sort_by: str = 'none', save_path: Optional[str] = None, label: str = 'qty'):
    if not pairs:
        print('无有效数据。')
        return
    data = pairs[:]
    if sort_by == 'price_asc':
        data.sort(key=lambda x: x[0])
    elif sort_by == 'price_desc':
        data.sort(key=lambda x: -x[0])
    elif sort_by == 'qty_desc':
        data.sort(key=lambda x: -x[1])

    prices = [p for p, q in data]
    qtys = [q for p, q in data]
    labels = [format_price_k(p) for p in prices]

    # 暗色主题参数
    bg = '#0b0b0c'
    ax_bg = '#0b0b0c'
    grid = '#202224'
    bar_c = '#6e6e73'  # 低对比灰
    txt_c = '#b5b6b8'

    plt.rcParams.update({
        'figure.facecolor': bg,
        'axes.facecolor': ax_bg,
        'axes.edgecolor': bg,
        'xtick.color': txt_c,
        'ytick.color': txt_c,
        'text.color': txt_c,
        'axes.labelcolor': txt_c,
    })

    h = max(2.5, 0.4 * len(data) + 1.0)
    fig, ax = plt.subplots(figsize=(6.4, h))
    y = np.arange(len(data))
    bars = ax.barh(y, qtys, color=bar_c, height=0.48, edgecolor=None)

    ax.set_yticks(y)
    ax.set_yticklabels(labels)
    ax.invert_yaxis()  # 上大下小，贴近截图视觉

    # 网格：横向细虚线，分隔行
    ax.xaxis.grid(False)
    ax.yaxis.grid(True, which='major', linestyle=(0, (1, 3)), color=grid, linewidth=0.6)

    # 去除多余边框、标题、图例等
    for spine in ax.spines.values():
        spine.set_visible(False)
    ax.set_xlabel('')
    ax.set_ylabel('')
    ax.set_title('')

    # 末端标注（默认数量，可选 price）
    max_q = max(qtys) if qtys else 0
    for i, rect in enumerate(bars):
        x = rect.get_width()
        y_ = rect.get_y() + rect.get_height() / 2.0
        if label == 'price':
            txt = format_price_k(prices[i])
        else:
            txt = f'{qtys[i]:,}'
        ax.text(x + max_q * 0.01 + 0.5, y_, txt, va='center', ha='left', color=txt_c, fontsize=9)

    # 余量 
    ax.set_xlim(0, max_q * 1.12 + 5)

    plt.tight_layout(pad=0.6)
    if save_path:
        os.makedirs(os.path.dirname(save_path) or '.', exist_ok=True)
        fig.savefig(save_path, dpi=160, bbox_inches='tight')
    plt.show()


In [None]:
# —— 主流程：给出图片路径，输出抽取的 (price, qty) 并绘图 ——
IMG_PATH = 'image.png'  # 可改为你的截图路径
CONF_MIN = 0.0
Y_TOL_PX = None  # 自动容差；也可设定成 6~20 这种整数
SORT_BY = 'none'  # 可用: 'none' | 'price_asc' | 'price_desc' | 'qty_desc'
SAVE_TO = None     # 如需保存：'images/price_qty_chart.png'

pil = Image.open(IMG_PATH).convert('RGB')
pairs = extract_price_qty(pil, conf_min=CONF_MIN, y_tol_px=Y_TOL_PX)
print('提取到行数:', len(pairs))
for p, q in pairs:
    print(format_price_k(p), q)

plot_price_qty_bars(pairs, sort_by=SORT_BY, save_path=SAVE_TO)


In [None]:
# —— 可选：交互式控件 ——
try:
    import ipywidgets as W
    from IPython.display import display

    img_inp = W.Text(value='image.png', description='图片路径', layout=W.Layout(width='400px'))
    conf_s = W.FloatSlider(value=0.0, min=0.0, max=100.0, step=0.5, description='置信度≥', readout_format='.1f')
    ytol_s = W.IntSlider(value=0, min=0, max=40, step=1, description='行容差(px)')
    sort_dd = W.Dropdown(options=[('不排序','none'),('价格↑','price_asc'),('价格↓','price_desc'),('数量↓','qty_desc')], value='none', description='排序')
    save_txt = W.Text(value='', description='保存路径', layout=W.Layout(width='360px'))
    run_btn = W.Button(description='运行', button_style='')
    out = W.Output()

    def _run(_=None):
        out.clear_output()
        with out:
            path = img_inp.value.strip()
            if not os.path.exists(path):
                print('找不到图片:', path)
                return
            pil = Image.open(path).convert('RGB')
            ytol = None if ytol_s.value <= 0 else int(ytol_s.value)
            pairs = extract_price_qty(pil, conf_min=float(conf_s.value), y_tol_px=ytol)
            print('提取到行数:', len(pairs))
            for p, q in pairs:
                print(format_price_k(p), q)
            sp = save_txt.value.strip() or None
            plot_price_qty_bars(pairs, sort_by=sort_dd.value, save_path=sp)

    run_btn.on_click(_run)
    ui = W.VBox([W.HBox([img_inp, run_btn]), W.HBox([conf_s, ytol_s, sort_dd]), save_txt, out])
    display(ui)
except Exception as e:
    print('未安装 ipywidgets，跳过交互控件。', e)
