In [1]:
import os

#dxf_folder = r"C:\Users\rafael.scatolin\OneDrive - Orguel\ai_designer_database\dxf files\training"
dxf_folder = r"C:\Users\Rafael\Desktop\training_dataset\dxf files\training\3-training"
dxf_files = [os.path.join(dxf_folder, file) for file in os.listdir(dxf_folder) if file.endswith('.dxf')]

print(f"Found {len(dxf_files)} DXF files. Processing...")

Found 1398 DXF files. Processing...


In [2]:
from typing import List, Dict
from pathlib import Path
import math
import time
import pickle
from tqdm import tqdm
import win32com.client
from shapely.affinity import scale, rotate, translate

# ----------  load the reference metrics  ----------
with open(str(Path.home()/"orguel_ml"/"scripts"/"ocr"/"character_metrics.pkl"), "rb") as f:
    characterMetrics = pickle.load(f)

# Start AutoCAD
acad = win32com.client.Dispatch("AutoCAD.Application")
acad.Visible = True # optional: run in background
time.sleep(5)  # Give time for AutoCAD to fully open

# Timing Settings
open_delay = 1.0
activate_delay = 0.5
close_delay = 0.2

positions: List[Dict] = []
for file in tqdm(dxf_files, desc="Building character position dataframe"):
    doc = acad.Documents.Open(file); time.sleep(open_delay)
    doc.Activate(); time.sleep(activate_delay)
    modelSpace = doc.ModelSpace
    
    for text in modelSpace:
        if text.ObjectName != "AcDbText": continue
        
        font = acad.ActiveDocument.TextStyles.Item(text.StyleName).FontFile.lower()
        height = text.Height
        widthFactor = getattr(text, "WidthFactor", 1.0)
        rotation = text.Rotation
        insertion_x, insertion_y, _ = text.InsertionPoint
        
        for character in text.TextString:
            if character == " ": continue
            
            # 1. look-up reference metrics (normalised to H=1)
            metrics = characterMetrics[font][character]
            
            # 2. build the glyph’s bbox polygon (correct scaling)
            bbox = metrics["bbox"]
            bbox = scale(bbox, xfact=height*widthFactor, yfact=height, origin=(0, 0))
            bbox = rotate(bbox, rotation, origin=(0, 0), use_radians=True)
            bbox = translate(bbox, xoff=insertion_x, yoff=insertion_y)
            
            # 3. save the row
            positions.append(
                {
                    "file": file,
                    "character": character,
                    "insertion": (insertion_x, insertion_y),
                    "rotation": rotation,
                    "height": height,
                    "bbox": bbox
                }
            )
            
            # 4. shift cursor for the next glyph
            advance = metrics["width"] * height * widthFactor
            insertion_x += advance * math.cos(rotation)
            insertion_y += advance * math.sin(rotation)
    
    doc.Close(False)
    time.sleep(close_delay)

Building character position dataframe: 100%|██████████| 1398/1398 [1:50:32<00:00,  4.74s/it] 


In [3]:
import pandas

# Convert to DataFrame
dataframe = pandas.DataFrame(positions)
dataframe.to_pickle("character_positions.pkl")
print(f"Labeled {len(dataframe)} characters using AutoCAD COM.")

Labeled 454788 characters using AutoCAD COM.


In [4]:
dataframe

Unnamed: 0,file,character,insertion,rotation,height,bbox
0,C:\Users\Rafael\Desktop\training_dataset\dxf f...,P,"(903.3410905061154, 1889.1567540814858)",0.0,12.0,POLYGON ((914.5225366179844 1889.1567540814858...
1,C:\Users\Rafael\Desktop\training_dataset\dxf f...,2,"(914.5225366179844, 1889.1567540814858)",0.0,12.0,POLYGON ((923.8458654037962 1889.1567540814858...
2,C:\Users\Rafael\Desktop\training_dataset\dxf f...,P,"(1027.196562506115, 468.99005308148753)",0.0,12.0,POLYGON ((1038.378008617984 468.99005308148753...
3,C:\Users\Rafael\Desktop\training_dataset\dxf f...,3,"(1038.378008617984, 468.99005308148753)",0.0,12.0,POLYGON ((1047.7013374037958 468.7772290705734...
4,C:\Users\Rafael\Desktop\training_dataset\dxf f...,6,"(1047.7013374037958, 468.99005308148753)",0.0,12.0,POLYGON ((1057.0246661896076 468.7854146094548...
...,...,...,...,...,...,...
454783,C:\Users\Rafael\Desktop\training_dataset\dxf f...,ó,"(45.0, -30.0)",0.0,1.0,"POLYGON ((45.76190476190476 -30, 45.7619047619..."
454784,C:\Users\Rafael\Desktop\training_dataset\dxf f...,ô,"(50.0, -30.0)",0.0,1.0,"POLYGON ((50.76190476190476 -30, 50.7619047619..."
454785,C:\Users\Rafael\Desktop\training_dataset\dxf f...,÷,"(55.0, -30.0)",0.0,1.0,"POLYGON ((55.714285714285715 -30, 55.714285714..."
454786,C:\Users\Rafael\Desktop\training_dataset\dxf f...,ø,"(60.0, -30.0)",0.0,1.0,"POLYGON ((60.76190476190476 -30, 60.7619047619..."
