# Batch apply `remove_blackline.py` (supports `(in_path)->image` functions)

**What this does**
- Recursively process all images in `main/`, `test/`, `train/`；
- Save to `processed/<subfolder>/...`, **keep the original file name**；

In [2]:
from pathlib import Path
import importlib.util, inspect
from typing import Callable, Optional, Tuple
from PIL import Image
import numpy as np
import traceback, zipfile
from datetime import datetime

try:
    import cv2
    HAS_CV2 = True
except Exception:
    HAS_CV2 = False

# ------------------ CONFIG ------------------
INPUT_ROOT = Path('../mini_project') # replace by your path
SUBFOLDERS = ['main','test','train']
OUTPUT_ROOT = Path('../mini_project/processed')
IMAGE_EXTS = {'.png'}
MAKE_ZIP = True

# --------- ADAPTER CONFIG ---------
# 选择函数模式：'img' / 'path' / 'path_to_img'
FUNC_MODE = 'path_to_img'   # 你的函数是 (in_path)->image
# 顶层函数名（本例：remove_black_lines_mode）
FUNC_NAME = 'remove_black_lines_mode'
# 若在类里则指定：
CLASS_NAME = None
METHOD_NAME = None
# ----------------------------------

for s in SUBFOLDERS:
    (OUTPUT_ROOT / s).mkdir(parents=True, exist_ok=True)

def load_module(module_file: Path):
    if not module_file.exists():
        raise FileNotFoundError(f"Could not find {module_file.resolve()} — put this notebook next to remove_blackline.py")
    spec = importlib.util.spec_from_file_location('remove_blackline', str(module_file))
    module = importlib.util.module_from_spec(spec)
    assert spec.loader is not None
    spec.loader.exec_module(module)  # type: ignore
    return module

mod = load_module(Path('remove_blackline.py'))

print('\n[Callables found in remove_blackline.py]')
for name, obj in vars(mod).items():
    if inspect.isfunction(obj):
        try:
            print(' - func', name, inspect.signature(obj))
        except Exception:
            print(' - func', name)
    if inspect.isclass(obj):
        print(' - class', name)
        for mname, mobj in vars(obj).items():
            if inspect.isfunction(mobj):
                try:
                    print('    - method', mname, inspect.signature(mobj))
                except Exception:
                    print('    - method', mname)

def resolve_attr(root, name):
    if not name:
        return None
    if hasattr(root, name):
        return getattr(root, name)
    raise AttributeError(f"Cannot find attribute '{name}' in {root}")

target = mod
if CLASS_NAME:
    cls = resolve_attr(mod, CLASS_NAME)
    target = cls()
func = resolve_attr(target, METHOD_NAME) if METHOD_NAME else resolve_attr(target, FUNC_NAME)

print(f"Adapter -> {FUNC_MODE}: {func.__name__}")

def save_image(obj, out_path: Path):
    # Accept np.ndarray (RGB/BGR) or PIL.Image
    if isinstance(obj, Image.Image):
        obj.save(out_path)
    elif isinstance(obj, np.ndarray):
        if HAS_CV2:
            arr = obj
            # Heuristic: if image looks like RGB, convert to BGR before imwrite
            if arr.ndim == 3 and arr.shape[2] == 3:
                # assume it's BGR already (your function returns BGR), so write directly
                cv2.imwrite(str(out_path), arr)
            else:
                cv2.imwrite(str(out_path), arr)
        else:
            # Fallback via PIL (assume RGB if 3 channels)
            if obj.ndim == 3 and obj.shape[2] == 3:
                Image.fromarray(obj[..., ::-1])  # if BGR->RGB
                Image.fromarray(obj[..., ::-1]).save(out_path)
            else:
                Image.fromarray(obj).save(out_path)
    else:
        raise TypeError('Unsupported return type from your function; return a PIL.Image or numpy array.')

def process_one(in_path: Path, out_path: Path) -> bool:
    try:
        if FUNC_MODE == 'path':
            func(in_path, out_path)  # function saves itself
            return out_path.exists()
        elif FUNC_MODE == 'img':
            with Image.open(in_path) as im:
                im = im.convert('RGB')
                out_im = func(im)
                if isinstance(out_im, Image.Image):
                    out_im.save(out_path)
                else:
                    Image.fromarray(out_im).save(out_path)
            return True
        elif FUNC_MODE == 'path_to_img':
            out_obj = func(str(in_path))  # pass path as string
            save_image(out_obj, out_path)
            return True
        else:
            raise ValueError("FUNC_MODE must be 'img', 'path', or 'path_to_img'")
    except Exception as e:
        print(f"[ERROR] {in_path} -> {e}")
        traceback.print_exc(limit=1)
        return False

total, ok = 0, 0
errors = []
start = datetime.now()

for sub in SUBFOLDERS:
    in_dir = INPUT_ROOT / sub
    out_dir = OUTPUT_ROOT / sub
    out_dir.mkdir(parents=True, exist_ok=True)
    if not in_dir.exists():
        print(f"[WARN] Skip '{sub}' — folder not found at {in_dir.resolve()}")
        continue
    for p in sorted(in_dir.rglob('*')):
        if p.is_file() and p.suffix.lower() in IMAGE_EXTS:
            total += 1
            rel = p.relative_to(in_dir)
            out_path = out_dir / rel
            out_path.parent.mkdir(parents=True, exist_ok=True)
            if process_one(p, out_path):
                ok += 1
            else:
                errors.append(str(p))

print(f"\nDone. {ok}/{total} images processed in {(datetime.now()-start).total_seconds():.1f}s.")
if errors:
    print('Failed files (first 10 shown):')
    for e in errors[:10]:
        print(' -', e)

if MAKE_ZIP:
    with zipfile.ZipFile('processed.zip', 'w', compression=zipfile.ZIP_DEFLATED) as zf:
        for p in Path('processed').rglob('*'):
            if p.is_file():
                zf.write(p, p.relative_to(Path('.')))
    print('Created archive: processed.zip')



[Callables found in remove_blackline.py]
 - func remove_black_lines_mode (img_path, threshold=50)
Adapter -> path_to_img: remove_black_lines_mode

Done. 20010/20010 images processed in 841.7s.
Created archive: processed.zip
