In [4]:
import networkx as nx

# STEP Reader
from OCC.Core.STEPControl import STEPControl_Reader
from OCC.Core.IFSelect import IFSelect_RetDone

# Your version’s TopExp imports<
from OCC.Core.TopExp import topexp
from OCC.Core.TopExp import TopExp_Explorer

# Constants
from OCC.Core.TopAbs import TopAbs_FACE, TopAbs_EDGE

# Shapes
from OCC.Core.TopoDS import TopoDS_Face, TopoDS_Edge
from OCC.Core.TopTools import TopTools_IndexedMapOfShape

# Surface properties
from OCC.Core.BRepGProp import brepgprop_SurfaceProperties
from OCC.Core.GProp import GProp_GProps

# Surface type
from OCC.Core.BRepAdaptor import BRepAdaptor_Surface
from OCC.Core.GeomAbs import (
    GeomAbs_Plane, GeomAbs_Cylinder, GeomAbs_Cone,
    GeomAbs_Sphere, GeomAbs_Torus, GeomAbs_BSplineSurface,
    GeomAbs_BezierSurface
)

# Normals
from OCC.Core.GeomLProp import GeomLProp_SLProps

print("Imports OK")


Imports OK


### Load STEP File

In [5]:
def load_step_shape(path: str):
    reader = STEPControl_Reader()
    status = reader.ReadFile(path)

    if status != IFSelect_RetDone:
        raise RuntimeError(f"Failed to read STEP file: {path}")

    reader.TransferRoots()
    return reader.OneShape()

step_path = r"/media/swapnil/3f73cc1a-8f9d-4c19-87af-99b3512ff5b2/MK_S/step/000355ca65fdcbb0e3d825e6_811a5312224ae67ce5b1e180_323fa0206419cdb236130779_default_jjeei.step" 
shape = load_step_shape(step_path)

print("STEP file loaded:", step_path)
shape


STEP file loaded: /media/swapnil/3f73cc1a-8f9d-4c19-87af-99b3512ff5b2/MK_S/step/000355ca65fdcbb0e3d825e6_811a5312224ae67ce5b1e180_323fa0206419cdb236130779_default_jjeei.step


<class 'TopoDS_Solid'>

### Extract Faces

In [7]:
# Create face map
face_map = TopTools_IndexedMapOfShape()

# Extract faces
topexp.MapShapes(shape, TopAbs_FACE, face_map)

# Your version uses Size()
num_faces = face_map.Size()
print("Number of faces:", num_faces)

# Show first few faces
for i in range(1, min(4, num_faces) + 1):
    face = face_map.FindKey(i)
    print(f"Face index {i} → type {type(face)}")


Number of faces: 6
Face index 1 → type <class 'OCC.Core.TopoDS.TopoDS_Face'>
Face index 2 → type <class 'OCC.Core.TopoDS.TopoDS_Face'>
Face index 3 → type <class 'OCC.Core.TopoDS.TopoDS_Face'>
Face index 4 → type <class 'OCC.Core.TopoDS.TopoDS_Face'>


In [8]:
c = 3
c

3

### Compute Area + Centroid

In [9]:
def face_area_centroid(face: TopoDS_Face):
    props = GProp_GProps()
    brepgprop_SurfaceProperties(face, props)

    area = props.Mass()
    c = props.CentreOfMass()
    centroid = (c.X(), c.Y(), c.Z())
    return area, centroid

# Test on few faces
for i in range(1, min(4, num_faces) + 1):
    face = face_map.FindKey(i)
    area, cen = face_area_centroid(face)
    print(f"Face {i} → Area: {area:.4f}, Centroid: {cen}")


Face 1 → Area: 645.1600, Centroid: (6.245004513516508e-16, -12.7, -165.1)
Face 2 → Area: 8387.0800, Centroid: (12.7, -12.7, 9.992007221626409e-16)
Face 3 → Area: 645.1600, Centroid: (-2.0816681711721685e-17, -12.7, 165.1)
Face 4 → Area: 8387.0800, Centroid: (-12.7, -12.7, 1.5543122344752192e-15)


  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)


### Surface Type (plane, cylinder, etc.)

In [10]:
def face_surface_type(face: TopoDS_Face):
    surf = BRepAdaptor_Surface(face)
    t = surf.GetType()

    if t == GeomAbs_Plane: return "plane"
    if t == GeomAbs_Cylinder: return "cylinder"
    if t == GeomAbs_Cone: return "cone"
    if t == GeomAbs_Sphere: return "sphere"
    if t == GeomAbs_Torus: return "torus"
    if t in (GeomAbs_BSplineSurface, GeomAbs_BezierSurface): return "freeform"
    return "other"

# Test
for i in range(1, min(4, num_faces) + 1):
    print(f"Face {i} surface type:", face_surface_type(face_map.FindKey(i)))


Face 1 surface type: plane
Face 2 surface type: plane
Face 3 surface type: plane
Face 4 surface type: plane


### Compute Face Normal

In [11]:
from OCC.Core.BRepMesh import BRepMesh_IncrementalMesh
from OCC.Core.BRep import BRep_Tool
from OCC.Core.TopLoc import TopLoc_Location
from OCC.Core.gp import gp_Pnt, gp_Vec

# Mesh the shape
mesh = BRepMesh_IncrementalMesh(shape, 0.1)
mesh.Perform()

def face_normal(face: TopoDS_Face):

    loc = TopLoc_Location()
    tri = BRep_Tool.Triangulation(face, loc)

    if tri is None:
        return (0.0, 0.0, 1.0)

    # ---- GET NODES SAFELY (works on all builds) ----
    # Try Nodes()
    if hasattr(tri, "Nodes"):
        try:
            nodes = tri.Nodes()
            get_node = lambda idx: nodes.Value(idx)
        except:
            pass

    # If Nodes() fails, try Node(idx)
    if not 'get_node' in locals():
        try:
            get_node = lambda idx: tri.Node(idx)
        except:
            pass

    # If Node(i) fails — manual map (old OCC versions)
    if not 'get_node' in locals():
        nb = tri.NbNodes()
        points = []
        for i in range(1, nb+1):
            p = tri.Node(i)
            points.append(p)
        get_node = lambda idx: points[idx-1]

    # ---- GET TRIANGLES SAFELY ----
    # Try Triangles()
    if hasattr(tri, "Triangles"):
        try:
            triangles = tri.Triangles()
            get_tri = lambda i: triangles.Value(i)
        except:
            pass

    # Try direct T(i)
    if not 'get_tri' in locals():
        try:
            get_tri = lambda i: tri.Triangle(i)
        except:
            pass

    # Last fallback: manual build
    if not 'get_tri' in locals():
        nb = tri.NbTriangles()
        tris = []
        for i in range(1, nb+1):
            tris.append(tri.Triangle(i))
        get_tri = lambda idx: tris[idx-1]

    # ---- COMPUTE NORMAL ----
    normal = gp_Vec(0, 0, 0)

    for i in range(1, tri.NbTriangles() + 1):
        t = get_tri(i)

        # Some versions give tuple, some require .Get()
        try:
            n1, n2, n3 = t.Get()
        except:
            n1, n2, n3 = t.Value(1), t.Value(2), t.Value(3)

        p1 = get_node(n1)
        p2 = get_node(n2)
        p3 = get_node(n3)

        v1 = gp_Vec(p1, p2)
        v2 = gp_Vec(p1, p3)
        nrm = v1.Crossed(v2)

        normal = normal.Added(nrm)

    if normal.Magnitude() > 1e-9:
        normal.Normalize()
        return (normal.X(), normal.Y(), normal.Z())

    return (0.0, 0.0, 1.0)


# TEST NOW
for i in range(1, min(4, num_faces) + 1):
    print("Face", i, "normal:", face_normal(face_map.FindKey(i)))


Face 1 normal: (0.0, 0.0, -1.0)
Face 2 normal: (-1.0, 0.0, 0.0)
Face 3 normal: (0.0, 0.0, -1.0)
Face 4 normal: (-1.0, 0.0, 0.0)




### Build Face Adjacency

In [12]:
# Build edge map
edge_map = TopTools_IndexedMapOfShape()
topexp.MapShapes(shape, TopAbs_EDGE, edge_map)

edge_to_faces = {}

# Explorer for all faces
exp_face = TopExp_Explorer(shape, TopAbs_FACE)
while exp_face.More():
    face = exp_face.Current()
    f_idx = face_map.FindIndex(face)

    # Explorer for edges on a face
    exp_edge = TopExp_Explorer(face, TopAbs_EDGE)
    while exp_edge.More():
        edge = exp_edge.Current()
        e_idx = edge_map.FindIndex(edge)

        edge_to_faces.setdefault(e_idx, set()).add(f_idx)
        exp_edge.Next()

    exp_face.Next()

# Debug sample
count = 0
for e, fs in edge_to_faces.items():
    if len(fs) > 1:
        print(f"Edge {e} shared by faces {list(fs)}")
        count += 1
        if count >= 5:
            break

print("Adjacency mapping complete.")


Edge 1 shared by faces [1, 6]
Edge 2 shared by faces [1, 2]
Edge 3 shared by faces [1, 5]
Edge 4 shared by faces [1, 4]
Edge 5 shared by faces [2, 6]
Adjacency mapping complete.


### Build Final Face Graph

In [13]:
# BUILD FINAL GRAPH
G = nx.Graph()

# Add nodes with features
for i in range(1, face_map.Size() + 1):
    face = face_map.FindKey(i)

    area, centroid = face_area_centroid(face)
    normal = face_normal(face)
    surf_type = face_surface_type(face)

    G.add_node(
        i,
        area=float(area),
        centroid_x=float(centroid[0]),
        centroid_y=float(centroid[1]),
        centroid_z=float(centroid[2]),
        normal_x=float(normal[0]),
        normal_y=float(normal[1]),
        normal_z=float(normal[2]),
        surface_type=surf_type
    )

# Add edges (adjacency)
for e_id, faces in edge_to_faces.items():
    faces = list(faces)
    if len(faces) > 1:  # only edges with >=2 faces indicate adjacency
        for a in range(len(faces)):
            for b in range(a + 1, len(faces)):
                G.add_edge(faces[a], faces[b])

print("Graph constructed.")
print("Nodes:", G.number_of_nodes())
print("Edges:", G.number_of_edges())

# Optional: preview node 1
print("Node 1 features:", G.nodes[1])


Graph constructed.
Nodes: 6
Edges: 12
Node 1 features: {'area': 645.1599999999999, 'centroid_x': 6.245004513516508e-16, 'centroid_y': -12.7, 'centroid_z': -165.1, 'normal_x': 0.0, 'normal_y': 0.0, 'normal_z': -1.0, 'surface_type': 'plane'}


  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)


## Convert the NetworkX CAD graph → PyTorch Geometric Data

### Encode Node Features

In [16]:
import torch
import torch_geometric
from torch_geometric.data import Data

# Convert surface types to integer IDs
surface_type_map = {
    "plane": 0,
    "cylinder": 1,
    "cone": 2,
    "sphere": 3,
    "torus": 4,
    "freeform": 5,
    "other": 6
}

def surface_type_to_id(s):
    return surface_type_map.get(s, 6)  # fallback "other"


  from .autonotebook import tqdm as notebook_tqdm
  from torch_geometric.distributed import (


### Build X (node feature tensor)

In [18]:
import numpy as np

node_features = []

for node_id in G.nodes():
    n = G.nodes[node_id]

    # numeric features
    area = n["area"]
    cx = n["centroid_x"]
    cy = n["centroid_y"]
    cz = n["centroid_z"]
    nx = n["normal_x"]
    ny = n["normal_y"]
    nz = n["normal_z"]

    # one-hot encode surface type
    st_id = surface_type_to_id(n["surface_type"])
    st_onehot = np.zeros(len(surface_type_map))
    st_onehot[st_id] = 1

    # combine all features
    feats = [area, cx, cy, cz, nx, ny, nz] + st_onehot.tolist()
    node_features.append(feats)

# Convert to tensor
x = torch.tensor(node_features, dtype=torch.float)

print("Node feature tensor X shape:", x.shape)
print(x)


Node feature tensor X shape: torch.Size([6, 14])
tensor([[ 6.4516e+02,  6.2450e-16, -1.2700e+01, -1.6510e+02,  0.0000e+00,
          0.0000e+00, -1.0000e+00,  1.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 8.3871e+03,  1.2700e+01, -1.2700e+01,  9.9920e-16, -1.0000e+00,
          0.0000e+00,  0.0000e+00,  1.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 6.4516e+02, -2.0817e-17, -1.2700e+01,  1.6510e+02,  0.0000e+00,
          0.0000e+00, -1.0000e+00,  1.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 8.3871e+03, -1.2700e+01, -1.2700e+01,  1.5543e-15, -1.0000e+00,
          0.0000e+00,  0.0000e+00,  1.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00,  0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 8.3871e+03, -4.2674e-16, -2.5400e+01, -2.9510e-15,  0.0000e+00,
         -1.0000e+00,  0.0000e+00,  1

### Build edge_index

In [20]:
edge_list = []

for (u, v) in G.edges():
    edge_list.append([u - 1, v - 1])  # convert from 1-based to 0-based
    edge_list.append([v - 1, u - 1])  # undirected graph

edge_index = torch.tensor(edge_list, dtype=torch.long).t().contiguous()

print("edge_index shape:", edge_index.shape)
print(edge_index)


edge_index shape: torch.Size([2, 24])
tensor([[0, 5, 0, 1, 0, 4, 0, 3, 1, 5, 1, 2, 1, 4, 2, 5, 2, 3, 2, 4, 3, 5, 3, 4],
        [5, 0, 1, 0, 4, 0, 3, 0, 5, 1, 2, 1, 4, 1, 5, 2, 3, 2, 4, 2, 5, 3, 4, 3]])


### Build PyG graph object

In [21]:
data = Data(
    x=x,
    edge_index=edge_index
)

print(data)


Data(x=[6, 14], edge_index=[2, 24])


### To convert all part into graphs

In [None]:
from tqdm import tqdm
import os
from part_graph import build_part_pyg_from_step
import torch

step_dir = r"/media/swapnil/3f73cc1a-8f9d-4c19-87af-99b3512ff5b2/MK_S/step"
out_dir  = r"/media/swapnil/3f73cc1a-8f9d-4c19-87af-99b3512ff5b2/MK_S/graphs/parts"

os.makedirs(out_dir, exist_ok=True)

step_files = [
    f for f in os.listdir(step_dir)
    if f.lower().endswith(".step")
]

print(f"Found {len(step_files)} STEP files in {step_dir}")

for f in tqdm(step_files, desc="Converting STEP → Graph"):
    step_path = os.path.join(step_dir, f)
    base = os.path.splitext(f)[0]
    out_path = os.path.join(out_dir, base + ".pt")

    try:
        data = build_part_pyg_from_step(step_path)
        torch.save(data, out_path)
    except Exception as e:
        tqdm.write(f"[FAIL] {f}: {e}")


Found 447442 STEP files in /media/swapnil/3f73cc1a-8f9d-4c19-87af-99b3512ff5b2/MK_S/step


  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop

[31;1m**** ERR StepFile : Undefined Parsing: Line 2: Incorrect syntax: unexpected end of file, expecting STEP    ****[0m
[FAIL] 39408f552c001b6f394fc618_b41ccc2c0bb81bfc92f8ffe5_0586a69245e9591c2825c74c_default_jjeei.step: Failed to read STEP file: /media/swapnil/3f73cc1a-8f9d-4c19-87af-99b3512ff5b2/MK_S/step/39408f552c001b6f394fc618_b41ccc2c0bb81bfc92f8ffe5_0586a69245e9591c2825c74c_default_jjeei.step


  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop_SurfaceProperties(face, props)
  brepgprop

In [None]:
from tqdm import tqdm
import os
from part_graph import build_part_pyg_from_step
import torch

# step_dir = r"C:\MK_S\AutoMate\step\step"
# out_dir  = r"C:\MK_S\AutoMate\graphs\parts"

step_dir = r"/media/swapnil/3f73cc1a-8f9d-4c19-87af-99b3512ff5b2/MK_S/step"
out_dir  = r"/media/swapnil/3f73cc1a-8f9d-4c19-87af-99b3512ff5b2/MK_S/graphs/parts"


os.makedirs(out_dir, exist_ok=True)

step_files = [
    f for f in os.listdir(step_dir)
    if f.lower().endswith(".step")
]

# Set of already converted base filenames (without extension)
converted_files = {
    os.path.splitext(f)[0]
    for f in os.listdir(out_dir)
    if f.lower().endswith(".pt")
}

print(f"Found {len(step_files)} STEP files")
print(f"Already converted: {len(converted_files)}")
print(f"Remaining: {len(step_files) - len(converted_files)}")

for f in tqdm(step_files, desc="Continuing STEP → Graph"):
    base = os.path.splitext(f)[0]
    
    # Skip if already converted
    if base in converted_files:
        continue

    step_path = os.path.join(step_dir, f)
    out_path = os.path.join(out_dir, base + ".pt")

    try:
        data = build_part_pyg_from_step(step_path)
        torch.save(data, out_path)
    except Exception as e:
        tqdm.write(f"[FAIL] {f}: {e}")


In [1]:
import os
from tqdm import tqdm

def get_folder_size_gb(folder_path):
    total_size = 0
    file_paths = []

    # First collect all file paths
    for root, _, files in os.walk(folder_path):
        for file in files:
            file_paths.append(os.path.join(root, file))

    # Iterate with tqdm
    for file_path in tqdm(file_paths, desc="Calculating size", unit="files"):
        try:
            total_size += os.path.getsize(file_path)
        except (FileNotFoundError, PermissionError):
            pass

    # Convert bytes to GB
    return total_size / (1024 ** 3)

if __name__ == "__main__":
    folder = r"Automate/step"
    size_gb = get_folder_size_gb(folder)
    print(f"\nTotal size: {size_gb:.2f} GB")


Calculating size: 100%|██████████| 447442/447442 [00:00<00:00, 777211.84files/s]


Total size: 69.00 GB





In [None]:
import os
import shutil
from tqdm import tqdm

FILES_PER_FOLDER = 10_000

def split_step_files(source_dir):
    # Collect STEP files
    step_files = sorted([
        f for f in os.listdir(source_dir)
        if f.lower().endswith((".step", ".stp"))
    ])

    if not step_files:
        print("No STEP files found.")
        return

    folder_index = 1
    file_counter = 0
    current_folder = None

    for file_name in tqdm(step_files, desc="Organizing STEP files", unit="files"):
        # Create new subfolder every 10,000 files
        if file_counter % FILES_PER_FOLDER == 0:
            folder_name = f"{folder_index:05d}"
            current_folder = os.path.join(source_dir, folder_name)
            os.makedirs(current_folder, exist_ok=True)
            folder_index += 1

        src = os.path.join(source_dir, file_name)
        dst = os.path.join(current_folder, file_name)

        shutil.move(src, dst)
        file_counter += 1

    print(f"\nDone! Organized {file_counter} files into {folder_index - 1} folders.")

if __name__ == "__main__":
    SOURCE_FOLDER = r"/path/to/your/step_files"
    split_step_files(SOURCE_FOLDER)
