In [1]:
import os

#dxf_folder = r"C:\Users\rafael.scatolin\OneDrive - Orguel\ai_designer_database\dxf files\training"
dxf_folder = r"C:\Users\Rafael\Desktop\training_dataset\dxf files\training2"
dxf_files = [os.path.join(dxf_folder, file) for file in os.listdir(dxf_folder) if file.endswith('.dxf')]

print(f"Found {len(dxf_files)} DXF files. Processing...")
print(f"➡️ Open Autocad before running the loop")

Found 46 DXF files. Processing...
➡️ Open Autocad before running the loop


In [2]:
from typing import List, Dict
from pathlib import Path
import math
import time
import pickle
from tqdm import tqdm
import win32com.client
from shapely.affinity import scale, rotate, translate

# ----------  load the reference metrics  ----------
with open(str(Path.home()/"orguel_ml"/"scripts"/"ocr"/"character_metrics.pkl"), "rb") as f:
    characterMetrics = pickle.load(f)

acad = win32com.client.Dispatch("AutoCAD.Application")
acad.Visible = True # optional: run in background

time.sleep(1) # Wait a moment to allow it to initialize

positions: List[Dict] = []
for file in tqdm(dxf_files, desc="Building character position dataframe"):
    doc = acad.Documents.Open(file)
    modelSpace = doc.ModelSpace
    
    for text in modelSpace:
        if text.ObjectName != "AcDbText": continue
        
        font = acad.ActiveDocument.TextStyles.Item(text.StyleName).FontFile.lower()
        height = text.Height
        widthFactor = getattr(text, "WidthFactor", 1.0)
        rotation = text.Rotation
        insertion_x, insertion_y, _ = text.InsertionPoint
        
        for character in text.TextString:
            if character == " ": continue
            
            # 1. look-up reference metrics (normalised to H=1)
            metrics = characterMetrics[font][character]
            
            # 2. build the glyph’s bbox polygon (correct scaling)
            bbox = metrics["bbox"]
            bbox = scale(bbox, xfact=height*widthFactor, yfact=height, origin=(0, 0))
            bbox = rotate(bbox, rotation, origin=(0, 0), use_radians=True)
            bbox = translate(bbox, xoff=insertion_x, yoff=insertion_y)
            
            # 3. save the row
            positions.append(
                {
                    "file": file,
                    "character": character,
                    "insertion": (insertion_x, insertion_y),
                    "rotation": rotation,
                    "height": height,
                    "bbox": bbox
                }
            )
            
            # 4. shift cursor for the next glyph
            advance = metrics["width"] * height * widthFactor
            insertion_x += advance * math.cos(rotation)
            insertion_y += advance * math.sin(rotation)
    
    doc.Close(False)
    time.sleep(0.1)

Building character position dataframe: 100%|██████████| 46/46 [03:24<00:00,  4.45s/it]


In [3]:
import pandas

# Convert to DataFrame
dataframe = pandas.DataFrame(positions)
dataframe.to_pickle("character_positions.pkl")
print(f"Labeled {len(dataframe)} characters using AutoCAD COM.")

Labeled 20948 characters using AutoCAD COM.
