<a href="https://colab.research.google.com/github/TerriblePepito/TerriblePepitostest/blob/main/changefontcolor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [20]:
import sys
!{sys.executable} -m pip install svgelements

from pathlib import Path
from lxml import etree
import re
from svgelements import SVG, Color

INPUT_DIR  = Path("/content/drive/MyDrive/testinput")
OUTPUT_DIR = Path("/content/drive/MyDrive/testoutput")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# -------- Réglages --------
TARGET_TEXT_COLOR = "#000000"   # noir

# Tolérance "jaune" : on accepte des jaunes proches (pas seulement #FFFF00)
# Tu peux élargir/resserrer en jouant sur ces seuils.
def is_yellow(color: Color) -> bool:
    if color is None:
        return False
    # color.red/green/blue dans [0..255]
    r, g, b = color.red, color.green, color.blue
    # Heuristique: jaune = R et G hauts, B bas
    return (r >= 180 and g >= 160 and b <= 140)

def set_fill_black(elem):
    # Met fill noir soit via attribut, soit via style (si present)
    style = elem.get("style")
    if style:
        # remplace/ajoute fill:...
        parts = [p.strip() for p in style.split(";") if p.strip()]
        d = {}
        for p in parts:
            if ":" in p:
                k,v = p.split(":",1)
                d[k.strip()] = v.strip()
        d["fill"] = TARGET_TEXT_COLOR
        elem.set("style", ";".join(f"{k}:{v}" for k,v in d.items()))
    else:
        elem.set("fill", TARGET_TEXT_COLOR)

def process_svg(in_path: Path, out_path: Path):
    print(f"Processing: {in_path.name}")
    print(f"Output to: {out_path.name}")

    # Parse SVG "vectoriel" avec svgelements pour avoir les bboxes transformées
    try:
        svg = SVG.parse(str(in_path))
        # Convert generator to list to allow multiple iterations and getting length
        all_svg_elements = list(svg.elements())
        print(f"svgelements parsed {len(all_svg_elements)} elements.")
    except Exception as e:
        print(f"Error parsing SVG with svgelements: {e}")
        return 0, 0, 0

    # 1) collecter les bboxes des objets jaunes (les flèches)
    yellow_boxes = []
    for e in all_svg_elements:
        # e peut être Path/Rect/Polygon/etc.
        try:
            fill = getattr(e, "fill", None)
            if fill is None:
                continue
            c = Color(fill) if not isinstance(fill, Color) else fill
            if is_yellow(c):
                bb = e.bbox()  # bbox en coordonnées après transforms
                if bb is not None:
                    # svgelements returns (x, y, width, height) sometimes
                    # convert to (xmin, ymin, xmax, ymax) for easier intersection checks
                    if isinstance(bb, tuple) and len(bb) == 4:
                        yellow_boxes.append((bb[0], bb[1], bb[0] + bb[2], bb[1] + bb[3]))
                    else:
                        yellow_boxes.append(bb) # if it's already (xmin, ymin, xmax, ymax)
        except Exception:
            continue
    print(f"Detected {len(yellow_boxes)} yellow objects with svgelements.")

    # 2) Maintenant modifier le SVG "XML" original: changer fill des <text>/<tspan>
    parser = etree.XMLParser(recover=True)
    try:
        tree = etree.parse(str(in_path), parser)
        root = tree.getroot()
        print(f"lxml parsed XML root element: {root.tag}")
    except Exception as e:
        print(f"Error parsing XML with lxml: {e}")
        return 0, 0, 0

    def localname(tag):
        return tag.split("}")[-1] if "}" in tag else tag

    # Indexer les textes via svgelements aussi, pour récupérer leur bbox exacte
    # On se base sur l'ordre d'apparition: généralement stable entre parse XML et parse SVG.
    text_elems_geom = []
    for e in all_svg_elements:
        if e.__class__.__name__ in ("Text", "TSpan"):
            try:
                bb = e.bbox()
                if bb is not None:
                    if isinstance(bb, tuple) and len(bb) == 4:
                        text_elems_geom.append((bb[0], bb[1], bb[0] + bb[2], bb[1] + bb[3]))
                    else:
                        text_elems_geom.append(bb)
                else:
                    text_elems_geom.append(None)
            except:
                text_elems_geom.append(None)
    print(f"Detected {len(text_elems_geom)} text elements with svgelements for geometry.")

    # Récupérer les noeuds XML <text>/<tspan> dans le même ordre
    xml_text_nodes = []
    for elem in root.iter():
        if localname(elem.tag) in ("text", "tspan"):
            xml_text_nodes.append(elem)
    print(f"Detected {len(xml_text_nodes)} text/tspan nodes in XML tree.")

    # S'il y a mismatch, on fait au mieux (sur l'intersection des tailles)
    n = min(len(xml_text_nodes), len(text_elems_geom))

    def intersects(bb, yb):
        # bb, yb: (xmin, ymin, xmax, ymax)
        if bb is None or yb is None:
            return False
        ax1, ay1, ax2, ay2 = bb
        bx1, by1, bx2, by2 = yb
        return not (ax2 < bx1 or ax1 > bx2 or ay2 < by1 or ay1 > by2)

    changed = 0
    for i in range(n):
        bb = text_elems_geom[i]
        if bb is None:
            continue
        # si le texte intersecte une zone jaune -> le passer en noir
        if any(intersects(bb, yb) for yb in yellow_boxes):
            set_fill_black(xml_text_nodes[i])
            changed += 1

    try:
        tree.write(str(out_path), encoding="utf-8", xml_declaration=True, pretty_print=True)
        print(f"Successfully wrote output file: {out_path.name}")
    except Exception as e:
        print(f"Error writing output file: {e}")
        # If writing fails, we might still have processed elements, but the file is the key.

    return changed, len(yellow_boxes), len(xml_text_nodes)

# -------- Batch --------
svg_files = sorted(INPUT_DIR.glob("*.svg"))
if not svg_files:
    print(f"Aucun .svg trouvé dans: {INPUT_DIR}")

total_changed = 0
for f in svg_files:
    out = OUTPUT_DIR / f.name
    changed, nb_yellow, nb_text = process_svg(f, out)
    total_changed += changed
    print(f"{f.name} -> flèches jaunes détectées: {nb_yellow} | textes trouvés: {nb_text} | textes passés en noir: {changed}")

print(f"\nTerminé. Total textes modifiés: {total_changed}")

Aucun .svg trouvé dans: /content/drive/MyDrive/testinput

Terminé. Total textes modifiés: 0


In [18]:
!pip -q install svgelements lxml

from svgelements import SVG, Color
from lxml import etree
from pathlib import Path
import re

# INPUT_SVG and OUTPUT_SVG are not defined in this cell.
# You might want to define them, for example:
# INPUT_SVG = Path("/content/drive/MyDrive/testinput/your_svg_file.svg")
# OUTPUT_SVG = Path("/content/drive/MyDrive/testoutput/processed_svg_file.svg")

INPUT_DIR  = Path("/content/drive/MyDrive/testinput")
OUTPUT_DIR = Path("/content/drive/MyDrive/testoutput")
BLACK_STYLE = "fill:#000000 !important"

def localname(tag):
    return tag.split("}")[-1] if "}" in tag else tag

def is_yellowish(color: Color) -> bool:
    """Heuristic: yellow = red+green high, blue low."""
    if color is None:
        return False
    r, g, b = color.red, color.green, color.blue
    return (r >= 170 and g >= 140 and b <= 160)

def bboxes_intersect(a, b):
    # a,b are (xmin, ymin, xmax, ymax)
    ax1, ay1, ax2, ay2 = a
    bx1, by1, bx2, by2 = b
    return not (ax2 < bx1 or ax1 > bx2 or ay2 < by1 or ay1 > by2)

def force_text_black(xml_elem):
    """
    Make sure we override CSS by injecting !important into style.
    We apply to <text> and <tspan>.
    """
    style = xml_elem.get("style") or ""
    # remove any existing fill:... occurrences (simple but effective)
    style = re.sub(r"fill\s*:\s*[^;]+;?", "", style, flags=re.IGNORECASE).strip()
    if style and not style.endswith(";"):
        style += ";"
    style += BLACK_STYLE
    xml_elem.set("style", style)

# ---- 1) Parse geometry with svgelements (handles transforms correctly) ----
# This assumes INPUT_SVG is a single file path. If you intend to process a directory,
# you would need a loop similar to the one in the first cell.
# For demonstration, let's assume INPUT_SVG is defined before this cell or uncomment the example above.
# If INPUT_SVG is not defined, this will cause a NameError.
# svg = SVG.parse(str(INPUT_SVG))

# The following code is commented out as INPUT_SVG is not defined.
# If you define INPUT_SVG and OUTPUT_SVG, you can uncomment and run this.
# yellow_boxes = []
# text_boxes = []   # list of (bbox, index_in_xml_text_list) later
# svg_text_bboxes = []

# for e in svg.elements():
#     name = e.__class__.__name__
#     try:
#         if name in ("Path", "Rect", "Circle", "Ellipse", "Polygon", "Polyline"):
#             fill = getattr(e, "fill", None)
#             if fill is None:
#                 continue
#             c = Color(fill) if not isinstance(fill, Color) else fill
#             if is_yellowish(c):
#                 bb = e.bbox()
#                 if bb is not None:
#                     yellow_boxes.append((bb[0], bb[2], bb[1], bb[3]) if len(bb)==4 else bb)
#         elif name in ("Text", "TSpan"):
#             bb = e.bbox()
#             if bb is not None:
#                 # svgelements Text bbox is (xmin, xmax, ymin, ymax)
#                 xmin, xmax, ymin, ymax = bb
#                 svg_text_bboxes.append((xmin, ymin, xmax, ymax))
#             else:
#                 svg_text_bboxes.append(None)
#     except Exception:
#         continue

# # ---- 2) Parse XML to modify actual nodes ----
# parser = etree.XMLParser(recover=True, remove_blank_text=False)
# tree = etree.parse(str(INPUT_SVG), parser)
# root = tree.getroot()

# xml_text_nodes = [n for n in root.iter() if localname(n.tag) in ("text", "tspan")]

# # Align lists conservatively (same order assumption)
# n = min(len(xml_text_nodes), len(svg_text_bboxes))

# changed = 0
# for i in range(n):
#     bb = svg_text_bboxes[i]
#     if bb is None:
#         continue
#     if any(bboxes_intersect(bb, yb[0]) for yb in yellow_boxes):
#         force_text_black(xml_text_nodes[i])
#         changed += 1

# tree.write(str(OUTPUT_SVG), encoding="utf-8", xml_declaration=True, pretty_print=False)
# print(f"Done. Yellow shapes detected: {len(yellow_boxes)} | Text elements recolored: {changed}")
# print(f"Saved: {OUTPUT_SVG}")

In [None]:
from google.colab import drive
drive.mount("/content/drive")

!apt-get -y install librsvg2-bin
!pip -q install opencv-python numpy

In [26]:
# version qui foncitionne en png mais les nouveaux nombres sont flous
import cv2
import numpy as np
from pathlib import Path

INPUT_DIR  = Path("/content/drive/MyDrive/testinput")
OUTPUT_DIR = Path("/content/drive/MyDrive/testoutput")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# --- Réglages (HSV) ---
# Jaune (flèches)
YELLOW_LO = np.array([15, 80, 80], dtype=np.uint8)
YELLOW_HI = np.array([45, 255, 255], dtype=np.uint8)

# Texte clair (blanc / presque blanc)
WHITE_LO  = np.array([0, 0, 200], dtype=np.uint8)     # faible saturation + forte luminosité
WHITE_HI  = np.array([180, 60, 255], dtype=np.uint8)

def process_png(in_path: Path, out_path: Path) -> dict:
    img = cv2.imread(str(in_path), cv2.IMREAD_COLOR)
    if img is None:
        return {"file": in_path.name, "status": "ERROR: cannot read"}

    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

    # 1) Masque des zones jaunes
    yellow_mask = cv2.inRange(hsv, YELLOW_LO, YELLOW_HI)

    # Nettoyage du masque jaune (pour éviter trous/bruit)
    k_close = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7))
    yellow_mask = cv2.morphologyEx(yellow_mask, cv2.MORPH_CLOSE, k_close, iterations=1)

    # 2) Masque des pixels blancs (texte) sur toute l'image
    white_mask = cv2.inRange(hsv, WHITE_LO, WHITE_HI)

    # 3) On garde uniquement le blanc à l'intérieur du jaune
    text_on_yellow = cv2.bitwise_and(white_mask, yellow_mask)

    # Élargir un peu pour capturer l'anti-aliasing du texte
    k_dilate = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
    text_on_yellow = cv2.dilate(text_on_yellow, k_dilate, iterations=1)

    # 4) Recolorer le texte en noir
    out = img.copy()
    out[text_on_yellow > 0] = (0, 0, 0)

    cv2.imwrite(str(out_path), out)

    return {
        "file": in_path.name,
        "status": "OK",
        "changed_pixels": int(np.count_nonzero(text_on_yellow))
    }

# --- Batch ---
png_files = sorted(list(INPUT_DIR.glob("*.png")))
if not png_files:
    print(f"Aucun PNG trouvé dans {INPUT_DIR}")

results = []
for f in png_files:
    out_file = OUTPUT_DIR / f.name
    r = process_png(f, out_file)
    results.append(r)
    print(f"{r['file']} -> {r['status']} | changed_pixels={r.get('changed_pixels', 0)}")

print("\nTerminé.")
print(f"Entrée : {INPUT_DIR}")
print(f"Sortie : {OUTPUT_DIR}")

Page 1.png -> OK | changed_pixels=965

Terminé.
Entrée : /content/drive/MyDrive/testinput
Sortie : /content/drive/MyDrive/testoutput


In [24]:
import cv2
import numpy as np
from pathlib import Path

INPUT_DIR  = Path("/content/drive/MyDrive/testinput")
OUTPUT_DIR = Path("/content/drive/MyDrive/testoutput")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# --- Seuils couleur ---
# Flèches jaunes
YELLOW_LO = np.array([15, 80, 80], dtype=np.uint8)
YELLOW_HI = np.array([45, 255, 255], dtype=np.uint8)

# Texte clair (blanc / très clair)
WHITE_LO  = np.array([0, 0, 200], dtype=np.uint8)
WHITE_HI  = np.array([180, 60, 255], dtype=np.uint8)

def process_png(in_path: Path, out_path: Path) -> dict:
    img = cv2.imread(str(in_path), cv2.IMREAD_COLOR)
    if img is None:
        return {"file": in_path.name, "status": "ERROR: cannot read"}

    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

    # 1) Masque des flèches jaunes
    yellow_mask = cv2.inRange(hsv, YELLOW_LO, YELLOW_HI)
    yellow_mask = cv2.morphologyEx(
        yellow_mask,
        cv2.MORPH_CLOSE,
        cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7,7)),
        iterations=1
    )

    # 2) Masque du texte clair
    white_mask = cv2.inRange(hsv, WHITE_LO, WHITE_HI)

    # 3) Texte clair seulement à l’intérieur du jaune
    text_mask = cv2.bitwise_and(white_mask, yellow_mask)

    # 4) Nettoyage fin pour des chiffres nets
    text_mask = cv2.morphologyEx(
        text_mask,
        cv2.MORPH_OPEN,
        cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2,2)),
        iterations=1
    )
    text_mask = cv2.medianBlur(text_mask, 3)

    # 5) Peindre en noir pur
    out = img.copy()
    out[text_mask > 0] = (0, 0, 0)

    cv2.imwrite(str(out_path), out)

    return {
        "file": in_path.name,
        "status": "OK",
        "changed_pixels": int((text_mask > 0).sum())
    }

# --- Batch ---
png_files = sorted(list(INPUT_DIR.glob("*.png")))
if not png_files:
    print(f"Aucun PNG trouvé dans {INPUT_DIR}")

for f in png_files:
    out_file = OUTPUT_DIR / f.name
    r = process_png(f, out_file)
    print(f"{r['file']} -> {r['status']} | pixels modifiés = {r['changed_pixels']}")

print("\nTerminé.")
print("Entrée :", INPUT_DIR)
print("Sortie  :", OUTPUT_DIR)

Page 1.png -> OK | pixels modifiés = 603

Terminé.
Entrée : /content/drive/MyDrive/testinput
Sortie  : /content/drive/MyDrive/testoutput


In [25]:
import cv2
import numpy as np
from pathlib import Path

INPUT_DIR  = Path("/content/drive/MyDrive/testinput")
OUTPUT_DIR = Path("/content/drive/MyDrive/testoutput")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# --- Détection du jaune (HSV) ---
YELLOW_LO = np.array([15, 70, 70], dtype=np.uint8)
YELLOW_HI = np.array([45, 255, 255], dtype=np.uint8)

# --- Détection du texte dans le jaune ---
# On cherche des pixels "peu saturés" (S bas) ET plutôt lumineux (V haut)
S_MAX_TEXT = 90     # augmente à 110 si besoin
V_MIN_TEXT = 140    # baisse à 120 si besoin

DEBUG_SAVE_MASKS = False  # mets True pour enregistrer les masques dans testoutput

def process_png(in_path: Path, out_path: Path) -> dict:
    img = cv2.imread(str(in_path), cv2.IMREAD_COLOR)
    if img is None:
        return {"file": in_path.name, "status": "ERROR: cannot read"}

    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

    # 1) masque jaune
    yellow_mask = cv2.inRange(hsv, YELLOW_LO, YELLOW_HI)
    yellow_mask = cv2.morphologyEx(
        yellow_mask,
        cv2.MORPH_CLOSE,
        cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9, 9)),
        iterations=1
    )

    # 2) masque texte (dans le jaune) : S faible & V haut
    h, s, v = cv2.split(hsv)
    text_mask = ((s <= S_MAX_TEXT) & (v >= V_MIN_TEXT)).astype(np.uint8) * 255
    text_mask = cv2.bitwise_and(text_mask, yellow_mask)

    # 3) nettoyage fin (net et pas gras)
    text_mask = cv2.morphologyEx(
        text_mask,
        cv2.MORPH_OPEN,
        cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (2, 2)),
        iterations=1
    )
    text_mask = cv2.medianBlur(text_mask, 3)

    # 4) recolor en noir
    out = img.copy()
    out[text_mask > 0] = (0, 0, 0)
    cv2.imwrite(str(out_path), out)

    # Debug : sauver les masques si besoin
    if DEBUG_SAVE_MASKS:
        cv2.imwrite(str(out_path.with_name(out_path.stem + "_mask_yellow.png")), yellow_mask)
        cv2.imwrite(str(out_path.with_name(out_path.stem + "_mask_text.png")), text_mask)

    return {
        "file": in_path.name,
        "status": "OK",
        "changed_pixels": int(np.count_nonzero(text_mask))
    }

# --- Batch ---
png_files = sorted(INPUT_DIR.glob("*.png"))
if not png_files:
    print(f"Aucun PNG trouvé dans {INPUT_DIR}")

for f in png_files:
    out_file = OUTPUT_DIR / f.name
    r = process_png(f, out_file)
    print(f"{r['file']} -> {r['status']} | changed_pixels={r.get('changed_pixels', 0)}")

print("\nTerminé.")
print("Entrée :", INPUT_DIR)
print("Sortie :", OUTPUT_DIR)

Page 1.png -> OK | changed_pixels=2418

Terminé.
Entrée : /content/drive/MyDrive/testinput
Sortie : /content/drive/MyDrive/testoutput
