### 1) Setup

In [None]:
# === META-SETUP (auto) =======================================================
# Działa lokalnie i na VM. Wybór profilu: os.getenv("ML_CONFIG") lub autodetekcja.
import os, socket, yaml
from pathlib import Path

# Autodetekcja: jeśli hostname zawiera 'non-prl' → cloud, inaczej local
HOST = socket.gethostname()
DEFAULT_CFG = "configs/runtime.cloud.yaml" if "non-prl" in HOST else "configs/runtime.local.yaml"
CFG_PATH = Path(os.getenv("ML_CONFIG", DEFAULT_CFG))

cfg = yaml.safe_load(open(CFG_PATH))
PRJ = Path(cfg.get("data_root", ".")).resolve()
INP = PRJ / cfg["inputs_dir"]
OUT = PRJ / cfg["outputs_dir"]; OUT.mkdir(parents=True, exist_ok=True)
USE_VISION = bool(cfg.get("use_vision", False))
VISION_KEY = cfg.get("vision_key", "")

# Klucz Vision tylko gdy włączony
if USE_VISION and VISION_KEY:
    os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = str(Path(VISION_KEY))

print(f"CFG: {CFG_PATH}")
print(f"PRJ: {PRJ}")
print(f"INP: {INP}")
print(f"OUT: {OUT}")
print(f"USE_VISION: {USE_VISION}, KEY set: {bool(os.environ.get('GOOGLE_APPLICATION_CREDENTIALS'))}")
# ============================================================================ 

Project root: /home/wysokozaawansowany_gmail_com/work/MetaLogic
Inputs:       /home/wysokozaawansowany_gmail_com/work/MetaLogic/inputs
Outputs:      /home/wysokozaawansowany_gmail_com/work/MetaLogic/outputs
Bucket:       nonprl-ml
USE_VISION:   False


'/home/wysokozaawansowany_gmail_com/secrets/vision-key.json'

### 2 Wybór pliku z dropdownem + miniatura

In [None]:
from pathlib import Path
import ipywidgets as widgets
from IPython.display import display, Image as IPImage
from PIL import Image

files = sorted([p.name for p in INP.glob("*") if p.suffix.lower() in [".jpg",".jpeg",".png"]])
assert files, f"Brak obrazów w {INP}"
dd = widgets.Dropdown(options=files, description="Plik:", layout=widgets.Layout(width="50%"))
thumb = widgets.Output(); display(dd, thumb)

def show_thumb(p):
    with thumb:
        thumb.clear_output(wait=True)
        im = Image.open(p).convert("RGB"); im.thumbnail((220,220)); display(IPImage(im._repr_png_()))
IMG_PATH = INP / files[0]; show_thumb(IMG_PATH)
dd.observe(lambda ch: (globals().__setitem__("IMG_PATH", INP/ch["new"]), show_thumb(INP/ch["new"])), names="value")

Dropdown(description='Wybierz plik:', layout=Layout(width='50%'), options=('0004.jpg', '0006.jpg', '0009.jpg',…

Output()

In [None]:
from google.cloud import vision
from pathlib import Path
import shutil, os

label_tags, object_tags, texts, logo_tags, boxes_vis = [], [], [], [], []
if USE_VISION:
    tmp = Path("/tmp/vision_in.jpg"); shutil.copyfile(IMG_PATH, tmp)
    with open(tmp, "rb") as f: gimg = vision.Image(content=f.read())
    client = vision.ImageAnnotatorClient()
    LABEL, OBJ, LOGO = 0.60, 0.60, 0.50

    labels = client.label_detection(image=gimg, timeout=15).label_annotations
    label_tags = [l.description.lower() for l in labels if (l.score or 0)>=LABEL]

    objs = client.object_localization(image=gimg, timeout=15).localized_object_annotations
    object_tags = [o.name.lower() for o in objs if (o.score or 0)>=OBJ]
    boxes_vis = [(o.name.lower(), [(v.x,v.y) for v in o.bounding_poly.normalized_vertices]) for o in objs if (o.score or 0)>=OBJ]

    ta = client.text_detection(image=gimg, timeout=15).text_annotations
    texts = [t.description.strip().lower() for t in ta[1:]] if ta else []

    logos = client.logo_detection(image=gimg, timeout=15).logo_annotations
    logo_tags = [l.description.lower() for l in logos if (l.score or 0)>=LOGO]
print(f"vision: {len(label_tags)} labels; {len(object_tags)} objects; {len(texts)} texts; {len(logo_tags)} logos")

IMG_PATH: /home/wysokozaawansowany_gmail_com/gcs/nonprl-ml/inputs/0241.jpg
USE_VISION: True
GAC exists: True → /home/wysokozaawansowany_gmail_com/secrets/vision-key.json


TypeError: 'ColorInfo' object is not subscriptable

In [None]:
from paddleocr import PaddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang='pl', use_gpu=False)
ocr_groups, ocr_boxes = [], []
res = ocr.ocr(str(IMG_PATH), cls=True)
for box, (text, conf) in res[0]:
    xs, ys = zip(*box)
    x0,y0,x1,y1 = min(xs),min(ys),max(xs),max(ys)
    area = (x1-x0)*(y1-y0)
    up = sum(ch.isupper() for ch in text if ch.isalpha()); letters = sum(ch.isalpha() for ch in text)
    is_sign = (area>0.05 and up/max(1,letters)>0.4) or (area>0.08 and len(text)>20)
    b4 = [(x0,y0),(x1,y0),(x1,y1),(x0,y1)]
    ocr_groups.append({"text":text, "norm_box":b4, "area":area, "is_sign":is_sign})
    ocr_boxes.append(b4)
ocr_groups.sort(key=lambda g: (not g["is_sign"], -g["area"]))
print(f"OCR groups: {len(ocr_groups)} | signs: {sum(g['is_sign'] for g in ocr_groups)}")

### Raport i miiatura

In [None]:
from PIL import Image, ImageDraw
from IPython.display import display

def pretty(name, seq):
    s=sorted(set(seq)); print(f"\n{name} ({len(s)}):"); [print(" •",x) for x in s]

print("=== SUMMARY ===")
pretty("OBJECTS", object_tags); pretty("LABELS", label_tags); pretty("LOGOS", logo_tags)
print("\nOCR GROUPS:"); [print(f"{i+1:02d}. {'[SIGN]' if g['is_sign'] else '      '} {g['text'][:140].replace('\n',' ')}") for i,g in enumerate(ocr_groups[:20])]

img = Image.open(IMG_PATH).convert("RGB"); W,H = img.size
scale=min(1200/W,1200/H,1.0); nw,nh=int(W*scale),int(H*scale); res=img.resize((nw,nh)); dr=ImageDraw.Draw(res)
for name, verts in (boxes_vis or []):
    pts=[(int(x*nw),int(y*nh)) for x,y in verts]; dr.line(pts+[pts[0]], width=2, fill=(0,128,0))
for g in (ocr_groups or []):
    pts=[(int(x*nw),int(y*nh)) for x,y in g["norm_box"]]; dr.line(pts+[pts[0]], width=3 if g["is_sign"] else 1, fill=(0,120,0))
out = OUT / f"preview_{Path(IMG_PATH).stem}.jpg"; res.save(out); print("preview:", out); display(res)

### 4) Scena z labels (Vision)

In [37]:
scene_top = []
if label_tags:
    scene_map = {
        "public event":  ["crowd","festival","parade","audience","event","concert","performance"],
        "old city street":["street","alley","historic site","town","architecture","building","cobblestone"],
        "factory/workshop":["factory","manufacturing","workshop","industrial","assembly line"],
        "shop/storefront":["storefront","shop","retail","market","supermarket"],
        "park/outdoors": ["park","trees","grass","recreation","outdoor"],
        "transport hub": ["station","platform","bus","tram","railway","terminal"],
    }
    ls = set(label_tags)
    scores = sorted(((k, sum(w in ls for w in v)) for k,v in scene_map.items()), key=lambda x:-x[1])
    scene_top = [s for s in scores if s[1]>0][:3]
scene_top

[]

### 6) Eksport CSV (Dublin Core)

In [None]:
import pandas as pd

# Zbierz unikalne tagi z wszystkich źródeł
tags = sorted(set(label_tags + object_tags + texts + logo_tags))

# Utwórz strukturę CSV zgodną z dLibra
rows = [
    ("", "pl", "en", "universal"),
    ("Title", "Ulica z szyldami", "Street photo with shop signs", ""),
    ("Description", "Widok ulicy z ludźmi i szyldami.", "Street view with people and text signs.", "")
]

# Dodaj wszystkie tagi jako Subject
for t in tags:
    rows.append(("Subject", "", t, ""))

# Dodaj scenę (jeśli istnieje)
if "scene_top" in locals() and scene_top:
    for scene, _ in scene_top:
        rows.append(("Subject", "", scene, ""))

# Zapisz CSV
out_csv = OUT_DIR / "import_dlibra.csv"
pd.DataFrame(rows).to_csv(out_csv, index=False, header=False, encoding="utf-8")

print("csv:", out_csv)