In [21]:
import os
import sys
import torch
from pathlib import Path
from platform import system
sys.path.append(str(Path.home() / "orguel_ml"))

system = system()

# Read the training folder
if system == 'Windows': training_folder = "D:\\ml\\training_dataset\\training"; save_path = "D:\\ml\\graph_dataset.pt"
elif system == 'Linux': training_folder = "/media/rafael/HD/ml/training_dataset/training"; save_path = "/media/rafael/HD/ml/graph_dataset.pt"

dxf_files = [os.path.join(training_folder, file) for file in os.listdir(training_folder) if file.endswith('.dxf')]

print(f"Total: {len(dxf_files)} DXF files / OS: {system}")

Total: 10 DXF files / OS: Linux


In [22]:
# Setup
mirror_axes = [None, 'x', 'y']
rotations = list(range(0, 360, 15))

print(f"steps: {len(rotations)}"); print(f"rorations: {rotations}")

steps: 24
rorations: [0, 15, 30, 45, 60, 75, 90, 105, 120, 135, 150, 165, 180, 195, 210, 225, 240, 255, 270, 285, 300, 315, 330, 345]


In [23]:
import orguel_ml

# Build coordinate dataset
coordinate_dataset = orguel_ml.CoordinateDataset(dxf_files)
n_dataframes = len(coordinate_dataset)

print(f"Number of dataframes: {n_dataframes}")

Extracting coordinates: 100%|██████████| 10/10 [00:00<00:00, 29.12it/s]

Number of dataframes: 10





In [24]:
# Build graph dataset
graph_dataset = orguel_ml.GraphDataset(coordinate_dataset, mirror_axes, rotations, raw_graphs=True)
n_graphs = len(graph_dataset)

print(f"Number of graphs: {n_graphs}")

Building (raw graphs): 100%|██████████| 720/720 [00:02<00:00, 357.35it/s]

Number of graphs: 720





In [25]:
# name your 10 edge features (adjust if you ever change the order)
EDGE_FEATS = [
    "parallel", "offset", "overlap_ratio", "oblique",
    "intersection_min", "intersection_max",
    "angle_diff_sin_min", "angle_diff_cos_min",
    "angle_diff_sin_max", "angle_diff_cos_max",
]

def _get_attr(g, *names):
    for n in names:
        if hasattr(g, n):
            return getattr(g, n)
    raise AttributeError(f"Graph has none of attrs {names}")

def inspect_one_graph(g, max_examples=8):
    ea = _get_attr(g, "edge_attributes", "edge_attr")          # [E, D]
    ei = _get_attr(g, "edge_pairs", "edge_index")               # [2, E]
    x  = _get_attr(g, "node_attributes", "x")                   # [N, F]

    bad = ~torch.isfinite(ea)                                   # [E, D]
    if not bad.any():
        return False  # all good

    # header with provenance if available
    src = getattr(g, "source", getattr(getattr(g, "meta", {}), "get", lambda *_: None)("source"))
    aug = getattr(g, "aug",    getattr(getattr(g, "meta", {}), "get", lambda *_: None)("aug"))
    ang = getattr(g, "angle",  getattr(getattr(g, "meta", {}), "get", lambda *_: None)("angle"))
    axis= getattr(g, "axis",   getattr(getattr(g, "meta", {}), "get", lambda *_: None)("axis"))
    print(f"\n⚠ non‑finite edge_attr | source={src} aug={aug} angle={ang} axis={axis} | ea={tuple(ea.shape)}")

    # summarize by column
    nan_per_col = torch.isnan(ea).sum(0).tolist()
    inf_per_col = torch.isinf(ea).sum(0).tolist()
    for j, (n, f) in enumerate(zip(nan_per_col, inf_per_col)):
        if n or f:
            name = EDGE_FEATS[j] if j < len(EDGE_FEATS) else f"feat{j}"
            print(f"  col {j:>2} ({name}): NaN={n} Inf={f}")

    # show a few concrete edges
    edges = bad.any(1).nonzero(as_tuple=False).view(-1)[:max_examples]
    for e_idx in edges.tolist():
        u, v = int(ei[0, e_idx]), int(ei[1, e_idx])
        which = bad[e_idx].nonzero(as_tuple=False).view(-1).tolist()
        cols  = [EDGE_FEATS[j] if j < len(EDGE_FEATS) else f"feat{j}" for j in which]
        vals  = {c: float(ea[e_idx, j]) for c, j in zip(cols, which)}
        print(f"  - edge {e_idx} ({u}->{v}) bad={cols} values={vals}")
    return True

def scan_graph_dataset(graph_dataset, max_graph_reports=10):
    bad_graphs = 0
    total_nan = total_inf = 0
    col_nan = torch.zeros(len(EDGE_FEATS), dtype=torch.long)
    col_inf = torch.zeros(len(EDGE_FEATS), dtype=torch.long)

    for idx, g in enumerate(graph_dataset):
        ea = _get_attr(g, "edge_attributes", "edge_attr")
        if not torch.isfinite(ea).all():
            bad_graphs += 1
            total_nan += torch.isnan(ea).sum().item()
            total_inf += torch.isinf(ea).sum().item()
            col_nan += torch.isnan(ea).sum(0).to(torch.long)
            col_inf += torch.isinf(ea).sum(0).to(torch.long)
            if bad_graphs <= max_graph_reports:
                print(f"\n[graph {idx}]")
                inspect_one_graph(g, max_examples=8)

    print("\n==== Summary ====")
    print(f"graphs with non‑finite edge_attr: {bad_graphs}/{len(graph_dataset)}")
    print(f"total NaN: {total_nan}  total Inf: {total_inf}")
    for j, (n, f) in enumerate(zip(col_nan.tolist(), col_inf.tolist())):
        if n or f:
            name = EDGE_FEATS[j] if j < len(EDGE_FEATS) else f"feat{j}"
            print(f"  col {j:>2} ({name}): NaN={n} Inf={f}")

scan_graph_dataset(graph_dataset.dataset)


==== Summary ====
graphs with non‑finite edge_attr: 0/720
total NaN: 0  total Inf: 0


In [26]:
from orguel_ml import DataframeField as F
from orguel_ml import EdgeAttribute as Att

graph = graph_dataset.dataset[3]
inspect_one_graph(graph)

False

In [27]:
graph._dataframe[F.length]

tensor([ 19.0000, 156.0001,  19.0000,  ..., 138.0268, 138.0270, 138.0270])

In [28]:
graph._dataframe[F.length].max()

tensor(590.9998)

In [12]:
lines_a = graph._dataframe[:,5]
lines_b = graph._dataframe[:,5]

In [13]:
lines_a[F.original_index]

tensor(5.)

In [14]:
(graph._dataframe[F.length] < 1000).sum()

tensor(167)

In [15]:
graph._dataframe[F.length].size()

torch.Size([1397])