# 사진 크롭 전처리(모델)

In [78]:
!pip install easyocr
!pip install tqdm



우선 easyocr을 이용해야하므로 설치

In [80]:
#Data management
import easyocr
import json, math, random, os
from pathlib import Path
from PIL import Image
import cv2

# ML
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd

# Display
from tqdm.notebook import tqdm 

# --------------------
# Config
# --------------------

MIN_CONF   = 0.35      # OCR 수준 제어
MIN_CHARS  = 3         # 아주 짧은 단어 드랍

reader = easyocr.Reader(['en'], gpu=True)
random.seed(42)

In [66]:
# 데이터 파일 가져오는 곳
data_dir = Path(os.path.join(os.getcwd(), "../data/")).resolve()
data_dir

PosixPath('/Users/mainframe/Workspace/ML/WV-Team/data')

In [67]:
whiskies_filtered_csv = data_dir / 'whiskies_filtered.csv'
whiskies_filtered_csv

PosixPath('/Users/mainframe/Workspace/ML/WV-Team/data/whiskies_filtered.csv')

In [68]:
photos_dir = data_dir / 'photos'
photos_dir

PosixPath('/Users/mainframe/Workspace/ML/WV-Team/data/photos')

In [69]:
crops_dir = data_dir / 'crops'
crops_dir.mkdir(parents=True, exist_ok=True)
crops_dir

PosixPath('/Users/mainframe/Workspace/ML/WV-Team/data/crops')

In [70]:
generated_metadata_csv = data_dir / 'metadata.csv'
generated_metadata_csv

PosixPath('/Users/mainframe/Workspace/ML/WV-Team/data/metadata.csv')

In [71]:
df = pd.read_csv(whiskies_filtered_csv)

#df.head()
f_df = df.get(['id', 'name', 'name_eng', 'category', 'brand', 'nation', 'region', 'distillery'])
assert {'id','name_eng','brand', 'category'}.issubset(set(f_df.columns)), "Need id,name,brand columns"

f_df.head()

Unnamed: 0,id,name,name_eng,category,brand,nation,region,distillery
0,60600124,1800 레포사도,1800 Reposado,기타,1800,멕시코,,Fábrica La Rojeña
1,60600125,1800 레포사도 38%,1800 Reposado 38%,기타,1800,멕시코,,Fábrica La Rojeña
2,60600126,1800 실버,1800 Silver,기타,1800,멕시코,,Fábrica La Rojeña
3,10101959,보모어 50년,Bowmore 50yo,싱글몰트,Bowmore,스코틀랜드,아일라 섬,Bowmore
4,20401642,마르스 더 Y.A. #03,Mars The Y.A. #03,블렌디드,Mars,일본,,


필요한 라이브러리 정의

In [76]:
def ocr_union_box(img_pil, min_conf=MIN_CONF, min_chars=MIN_CHARS, margin=MARGIN_FR):
    """Return (x,y,w,h), best_text, best_conf OR None if no viable text."""
    W, H = img_pil.size
    arr = np.array(img_pil)
    res = reader.readtext(arr, detail=1)

    xs, ys = [], []
    best_text, best_conf = "", 0.0

    #print(res)

    for r in res:
        if len(r) < min_chars: 
            continue
        poly, text, conf = r[0], r[1], float(r[2])
        if conf < min_conf or len(text.strip()) < min_chars:
            continue
        pts = np.array(poly, dtype=np.float32)
        xs += pts[:,0].tolist()
        ys += pts[:,1].tolist()
        if conf > best_conf and text.strip():
            best_conf, best_text = conf, text.strip()

    if not xs or not ys:
        return None

    x1, y1 = int(max(0, min(xs))), int(max(0, min(ys)))
    x2, y2 = int(min(W, max(xs))), int(min(H, max(ys)))

    pad = int(margin * min(W, H))
    x1 = max(0, x1 - pad); y1 = max(0, y1 - pad)
    x2 = min(W, x2 + pad); y2 = min(H, y2 + pad)

    return (x1, y1, x2 - x1, y2 - y1), best_text, best_conf

def fallback_label_box(img_pil):
    """Simple OpenCV heuristic: dense edges in lower/middle band."""
    W, H = img_pil.size
    arr = np.array(img_pil)
    gray = cv2.cvtColor(arr, cv2.COLOR_RGB2GRAY)
    edges = cv2.Canny(gray, 50, 150)

    mask = np.zeros_like(edges); mask[int(0.30*H):H,:] = 1
    edges = edges * mask

    kern = cv2.getStructuringElement(cv2.MORPH_RECT, (5,3))
    closed = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kern, iterations=2)

    cnts, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not cnts:
        return None

    best, best_score = None, -1
    for c in cnts:
        x,y,w,h = cv2.boundingRect(c)
        area = w*h
        ar = w / max(h, 1)
        if area < 0.02*W*H: 
            continue
        if not (0.5 <= ar <= 4.5):
            continue
        score = 0.6*(area/(W*H)) + 0.4*(y/H)  # larger & lower is better
        if score > best_score:
            best_score, best = score, (x,y,w,h)
    if best is None:
        return None

    pad = int(0.04 * min(W,H))
    x,y,w,h = best
    x1 = max(0, x - pad); y1 = max(0, y - pad)
    x2 = min(W, x + w + pad); y2 = min(H, y + h + pad)
    return (x1, y1, x2 - x1, y2 - y1)

def find_image_paths(whisky_id: int):
    paths = []
    p1 = photos_dir / f"{whisky_id}_2.png"
    p2 = photos_dir / f"{whisky_id}_1.webp"
    if p1.exists():
        paths.append(p1)
    if p2.exists():
        paths.append(p2)
    return paths

필요한 데이터 가져오기

In [82]:
records = []
ok, missed = 0, 0

bar = tqdm(f_df["id"].tolist(), desc="Processing whiskies", unit="img")

for wid in bar:
    wid = int(wid)
    imgs = find_image_paths(wid)
    if not imgs:
        missed += 1
        records.append(dict(id=wid, input_path=None, crop_path=None, x=None,y=None,w=None,h=None,
                            ocr_text=None, ocr_conf=None, method=None))
        bar.set_postfix({"✅": ok, "❌": missed})
        continue

    for in_path in imgs:
        try:
            img = Image.open(in_path).convert("RGB")
        except Exception as e:
            missed += 1
            bar.set_postfix({"✅": ok, "❌": missed})
            records.append(dict(id=wid, input_path=str(in_path), crop_path=None, x=None,y=None,w=None,h=None,
                                ocr_text=None, ocr_conf=None, method="io_error"))
            continue

        # 1) OCR union (preferred)
        result = ocr_union_box(img)
        method = "ocr_union"
        if result is None:
            # 2) Fallback
            box = fallback_label_box(img)
            method = "fallback"
            text, conf = None, None
        else:
            box, text, conf = result

        if box is None:
            missed += 1
            records.append(dict(id=wid, input_path=str(in_path), crop_path=None, x=None,y=None,w=None,h=None,
                                ocr_text=text, ocr_conf=conf, method="none"))
            continue

        x,y,w,h = box
        crop = img.crop((x,y,x+w,y+h))
        out_path = crops_dir / f"{in_path.stem}_label.jpg"
        crop.save(out_path, quality=92)
        ok += 1
        bar.set_postfix({"✅": ok, "❌": missed})
        records.append(dict(id=wid, input_path=str(in_path), crop_path=str(out_path),
                            x=x,y=y,w=w,h=h, ocr_text=text, ocr_conf=conf, method=method))

print(f"✅ crops saved: {ok} | ❌ missed: {missed}")
pd.DataFrame.from_records(records).to_csv(generated_metadata_csv, index=False)
print(f"Manifest written to {generated_metadata_csv}")

Processing whiskies:   0%|          | 0/2145 [00:00<?, ?img/s]

✅ crops saved: 4022 | ❌ missed: 246
Manifest written to /Users/mainframe/Workspace/ML/WV-Team/data/metadata.csv


In [107]:
def fill_defaults_for_missed(records, crops_dir: Path):
    """
    crop 실패(=crop_path가 None)인 항목들을
    '원본 전체 이미지'로 대체 저장하고 records를 업데이트한다.
    """
    filled, skipped = 0, 0
    for r in records:
        if r.get("crop_path"):  # 이미 성공한 항목은 패스
            continue
        in_path = photos_dir / f'{r.get("id")}_2.png'
        out_path = crops_dir / f'{in_path.stem}_label.jpg'

        #print(in_path)
        #print(out_path)
        
        if not in_path.exists():
            # 입력 자체가 없으면 스킵
            #r.setdefault("method", "io_error")
            skipped += 1
            continue
            
        try:
            img = Image.open(in_path).convert("RGB")
        except Exception:
            #r["method"] = "io_error"
            skipped += 1
            continue

        # 원본 전체를 그대로 저장 (크롭 없음)
        img.save(out_path, quality=92)

        # records 업데이트: 전체 이미지 범위로 박스 채움
        w, h = img.size
        r.update({
            "crop_path": str(out_path),
            "x": 0, "y": 0, "w": w, "h": h,
            "method": "default"
        })
        filled += 1
    print(f"[Default Fill] ✅ filled: {filled} | ↩️ skipped: {skipped}")


In [108]:
fill_defaults_for_missed(records, crops_dir)

[Default Fill] ✅ filled: 0 | ↩️ skipped: 0


In [109]:
pd.DataFrame.from_records(records).to_csv(generated_metadata_csv, index=False)
print(f"Manifest written to {generated_metadata_csv}")

Manifest written to /Users/mainframe/Workspace/ML/WV-Team/data/metadata.csv
