# Title: csck700_ifc_parser

### The module parses an IFC model to extract the IFcElements, their properties and relationships and loads them to nodes and edges into Neo4J.

# 0. Table of content:

# 1. Settings:

In [41]:
IFC_PATH = r"..\data\raw\Building-Structural.ifc"

NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "TitineTiteFiro@1952"

In [43]:
REL_MAP ={
    "IfcRelDefinesByProperties": ("RelatingPropertyDefinition", "RelatedObjects", None),
    "IfcRelDefinesByType": ("RelatingType", "RelatedObjects", "DEFINED_BY_TYPE"),
    "IfcRelAssociatesMaterial": ("RelatingMaterial", "RelatedObjects", "ASSOCIATED_MATERIAL"),
    "IfcRelContainedInSpatialStructure": ("RelatingStructure", "RelatedElements", "CONTAINED_IN"),
    "IfcRelAggregates": ("RelatingObject", "RelatedObjects", "AGGREGATES"),
    "IfcRelAssociatesClassification": ("RelatingClassification", "RelatedObjects", "ASSOCIATED_CLASSIFICATION")
} 

# 2. Imports:

In [47]:
import ifcopenshell
import ifcopenshell.util.element as util
from neo4j import GraphDatabase
from collections import Counter

# 3. Helper functions:

**IFCRELATIONSHIPS - EDGES:**

In [54]:
def identify_rel(model):
    """
    Identify all distinct IfcRelationship classes present in the model.
    """

    rel_classes = set()

    rels = model.by_type("IfcRelationship")

    for rel in rels:
        rel_classes.add(rel.is_a())

    print("Identified relationship classes:")  
    for r in rel_classes:
        print(f"- {r}.")
        
    return rel_classes

In [56]:
def obj_id(o):
    """
    Returns the GUID of the given object, if present, else synthetic ClassName:step_id.

    Non-IfcRoot entities (e.g. IfcMaterial, IfcClassificationReference)
    do not have a GlobalId, so the fallback ensures uniqueness.
    """
    return getattr(o, "GlobalId", f"{o.is_a()}:{o.id()}")

In [58]:
def as_list(v):
    """
    Ensures the value is returned as a list.
    """
    if v is None:
        return []
    return v if isinstance(v, (list, tuple)) else [v]

In [160]:
def edges_maker(model, rel_map=REL_MAP, pset_qto=True):
    """
    For a given model, creates a dictionary of edges (IfcRelationships exploded into from -> to pairs.
    Returns:
        - edges: a dictionary of edges with keys:
            - id, labels, rel_class, rel_id, from, to.
    """

    edges = dict()
    pairs = set()

    for rel_class, (relating_attr, related_attr, label) in rel_map.items():
        for rel in model.by_type(rel_class):
            relating = getattr(rel, relating_attr, None)
            related = getattr(rel, related_attr, None)
            if relating is None:
                continue
            
            # distinction property set vs quantity set:
            if rel_class == "IfcRelDefinesByProperties":
                if pset_qto and relating.is_a("IfcElementQuantity"):
                    edge_label = "DEFINED_BY_QUANTITIES"
                else:
                    edge_label = "DEFINED_BY_PROPERTIES"
            else:
                edge_label = label

            frm = obj_id(relating)
            rel_gid = obj_id(rel)

            # one-to-many relationships:
            for r in as_list(related):
                if r is None:
                    continue
                to  = obj_id(r)
                
                # sanity check:
                pairs.add((rel_gid, frm, to))
                
                edge_id = f"{rel_gid}:{frm}->{to}"
                edges[edge_id]={
                    "id": edge_id,
                    "labels": edge_label,
                    "rel_class": rel_class,
                    "rel_id": rel_gid,
                    "from": frm,
                    "to": to
                }

    expected = len(pairs)
    actual = len(edges)
    
    # print(f"Expected number of edges: {expected}.")
    # print(f"Number of edges: {actual}.")
    
    return expected, actual, edges

**IFCENTITIES - NODES:**

In [63]:
def get_entity_from_id(model, oid):

    if isinstance(oid, str) and (len(oid) == 22) and (":" not in oid):
        return model.by_guid(oid)

    elif ":" in oid:
        try:
            step_id = int(oid.split(":")[-1])
            return model.by_id(step_id)
        except Exception:
            return None

    return None

In [65]:
def get_entities_from_edges(model, edges, typ):

    entities_from_edges = []
    
    for edge in edges.values():
        obj = get_entity_from_id(model, edge[typ])
        if obj:
            entities_from_edges.append(obj) 

    return entities_from_edges

In [67]:
def merge_unique(obj1, obj2):
    merged = dict()
    for o in obj1 + obj2:
        if o:
            merged[obj_id(o)] = o
    return list(merged.values())

In [69]:
def entities_classes_identify(all_objects):

    clss = set()
    
    for o in all_objects:
        if o.is_a("IfcElement"):
            clss.add("IfcElement")
        elif "Type" in o.is_a():
            clss.add("IfcTypeObject")
        else:
            clss.add(o.is_a())

    return clss

In [71]:
def props_identify(all_objects):

    props = {}

    pset = "IfcPropertySet"
    qset = "IfcElementQuantity"

    for o in all_objects:
        if o.is_a(pset):
            props.setdefault(pset, dict())
            for p in o.HasProperties:
                props[pset][p.Name] = {
                    "property_type": p.is_a()
                }
                
        elif o.is_a(qset):
            props.setdefault(qset, dict())
            for q in o.Quantities:
                props[qset][q.Name] = {
                    "property_type": q.is_a()
                }

    # only IfcMaterial class, no specific material properties to extract beyond the material name.
                
    return props

In [73]:
def pset_props_collect(o):

    def unwrap(v):
        if v is None:
            return None
        else:
            return getattr(v, "wrappedValue", v)

    props = {}
    
    for p in o.HasProperties:
        if p.is_a("IfcPropertySingleValue"):
            nv = getattr(p, "NominalValue", None)
            props[p.Name] = {
                "kind": "SingleValue",
                "value": unwrap(nv)
            }
        elif p.is_a("IfcPropertyEnumeratedValue"):
            vals = [unwrap(v) for v in (getattr(p, "EnumerationValues", []) or [])]
            enum_ref = getattr(p, "EnumerationReference", None)
            domain = (
                [unwrap(v) for v in (getattr(enum_ref, "EnumerationValues", []) or [])]
                if enum_ref else None
            )
            props[p.Name] = {
                "kind": "EnumeratedValue",
                "value": vals if len(vals) != 1 else vals[0],
                "enumeration_name": getattr(enum_ref, "Name", None) if enum_ref else None,
                "domain": domain
            }

    # if needed, to be developed to handle other kinds of properties.
        
    return props

In [75]:
def qset_props_collect(o, include_none=False):

    props = {}
    
    for q in getattr(o, "Quantities", []) or []:
        val = None
        if q.is_a("IfcQuantityLength"):
            val = q.LengthValue
        elif q.is_a("IfcQuantityArea"):
            val = q.AreaValue
        elif q.is_a("IfcQuantityVolume"):
            val = q.VolumeValue
        elif q.is_a("IfcQuantityCount"):
            val = q.CountValue
        elif q.is_a("IfcQuantityWeight"):
            val = q.WeightValue

        if include_none or val is not None:  
            props[q.Name] = {
                "kind": q.is_a(),
                "value": val
            }

    return props

In [137]:
def node_set(o):

    node = dict()

    # helpers:
    def g_n(o, name, default=None):
        return getattr(o, name, default)

    def basic_props(label, addition=None):
        node_id = obj_id(o)
        base = {
            "id": node_id,
            "labels": [label, o.is_a()],
            "name": g_n(o, "Name")
        }
        if addition:
            base.update(addition)
        node[node_id] = base

    
    cls = o.is_a()

    if cls == "IfcProject":
        basic_props("PROJECT")
        
    elif o.is_a("IfcElement"):
        # ObjectType fallback:
        basic_props("ELEMENT", {
            "predefined_type": g_n(o, "PredefinedType", g_n(o, "ObjectType"))
        })
        
    elif cls == "IfcPropertySet":
        basic_props("PSET", {
            "properties": pset_props_collect(o)
        })
        
    elif cls == "IfcElementQuantity":
        basic_props("QSET", {
            "properties": qset_props_collect(o)
        })
        
    elif "Type" in cls:
        basic_props("TYPE")
        
    elif cls == "IfcMaterial":
        basic_props("MATERIAL")
        
    elif cls in ("IfcSite", "IfcBuilding", "IfcBuildingStorey", "IfcSpace"):
        basic_props("SPATIAL")
        
    elif cls == "IfcClassificationReference":
        scheme = g_n(o, "ReferencedSource") # IfcClassification
        basic_props("CLASSIFICATION_REF",{
            "code": g_n(o, "Identification"),
            "uri": g_n(o, "Location"),
            # link to IfcClassification by id:
            "scheme_id": obj_id(scheme) if scheme else None,
        })

    return node        

In [79]:
def nodes_maker(all_objects):

    nodes = {}

    for o in all_objects:
        nodes.update(node_set(o))

    return nodes

**SANITY CHECKS:**

In [None]:
def elements_inspection(model, ifc_type="IfcProduct"):
    """
    Counts and lists IfcElements (excluding proxies in this case) grouped by class.
    Returns:
        - elements: list.
        - classes_counter: Counter.
    """
    
    products = model.by_type(ifc_type)
    elements = [p for p in products if (p.is_a("IfcElement")) and not (p.is_a("IfcBuildingElementProxy"))]
    elements_classes = [el.is_a() for el in elements]
    classes_counter = Counter(elements_classes)

    print(f"Total number of elements: {len(elements)}")
    for cl, count in classes_counter.items():
        print(f"{cl}: {count}.")

    return classes_counter

In [None]:
def sanity(expected, actual, relating, related):
    passed = True
    
    if expected == actual:
        print(f"Edge count check passed: expected and actual are consistent ({actual}).")
    else:
        print(f"ERROR: edges count mismatch - expected {expected} but got {actual}.")
        passed = False
        
    if len(relating) == actual:
        print(f"Relating entities count check passed: {len(relating)} = number of edges ({actual}).")
    else:
        print(f"ERROR: relating entities count mismatch - edges {actual}, relating entities {len(relating)}.")
        passed = False
        
    if len(related) == actual:
        print(f"Related entities count check passed: {len(related)} = number of edges ({actual}).")
    else:
        print(f"ERROR: related entities count mismatch - edges {actual}, related entities {len(related)}.")
        passed = False

    relating_set = set(relating)
    related_set = set(related)
    union_nodes = relating_set | related_set

    if len(union_nodes) >= max(len(relating_set), len(related_set)):
        print(f"Unique node union check passed: |union|={len(union_nodes)}, |relating|={len(relating_set)}, |related|={len(related_set)}")
    else:
        print(f"ERROR: union size invalind - |union|={len(union_nodes)}, |relating|={len(relating_set)}, |related|={len(related_set)}")

# 4. Data load:

In [83]:
model = ifcopenshell.open(IFC_PATH)

In [85]:
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))

# 5. IFC Parsing:

The parsing process begins with the IfcRelationships, from which a dictionary of Edges is created to support Neo4j graph construction. Each edge captures the "from" and "to" attributes that define the connected IFC entities. Based on these connections, all distinct entities can be identified and represented in a dictionary of Nodes. Inverse relationships are not included, as Neo4j inherently supports bidirectional traversal of edges; storing them explicitly would only duplicate information and increase memory usage.  

To ensure consistency, node identifiers are normalized (e.g. through unique obj_id values), and key metadata such as IFC class labels and relevant properties are attached to nodes and edges. This guarantees that the resulting graph remains both semantically clear and efficient for querying.  

## 5.1. IfcRelationships - Edges Preparation:

### 5.1.1. Functions Building Guidance:

**edges_maker:**

In [90]:
identify_rel(model)

Identified relationship classes:
- IfcRelAggregates.
- IfcRelDefinesByType.
- IfcRelContainedInSpatialStructure.
- IfcRelAssociatesClassification.
- IfcRelAssociatesMaterial.
- IfcRelDefinesByProperties.


{'IfcRelAggregates',
 'IfcRelAssociatesClassification',
 'IfcRelAssociatesMaterial',
 'IfcRelContainedInSpatialStructure',
 'IfcRelDefinesByProperties',
 'IfcRelDefinesByType'}

### 5.1.2. Edges Dictionary:

In [162]:
expected, actual, edges = edges_maker(model)

## 5.2. IfcEntities - Nodes Preparation:

### 5.2.1. Collect IfcEntities:

In [95]:
relating_ents = get_entities_from_edges(model, edges, "from")

In [97]:
related_ents = get_entities_from_edges(model, edges, "to")

In [184]:
all_ents = merge_unique(relating_ents, related_ents)

### 5.2.2. Proxies:

In [187]:
all_ents

[#32=IfcPropertySet('18DB$FpSH4QhNntrrSRVGz',#1,'Pset_BuildingCommon',$,(#31)),
 #76=IfcPropertySet('0hVJYXG1r7ywQAq8Vpug40',#1,'Pset_WallCommon',$,(#72,#74,#75)),
 #86=IfcElementQuantity('0VVWsZ$_bFHgmAwNtcydte',#1,'Qto_WallBaseQuantities',$,$,(#82,#83,#84,#85)),
 #105=IfcPropertySet('0ek$CCUjnFefFg04xFxxG_',#1,'Pset_WallCommon',$,(#102,#103,#104)),
 #111=IfcElementQuantity('22KfX2yaj4UBecOFxQr$3y',#1,'Qto_WallBaseQuantities',$,$,(#107,#108,#109,#110)),
 #128=IfcPropertySet('2DaAsqJGPDexb$sFANyw0e',#1,'Pset_WallCommon',$,(#126,#127)),
 #134=IfcElementQuantity('16mzz5kV15mAyMAccFRkj4',#1,'Qto_WallBaseQuantities',$,$,(#130,#131,#132,#133)),
 #176=IfcPropertySet('3UegLqoYrB5QLJ1Fl0C0gk',#1,'Pset_WallCommon',$,(#173,#174,#175)),
 #182=IfcElementQuantity('3BkYHDW2b4LxsPRJLXlKa9',#1,'Qto_WallBaseQuantities',$,$,(#178,#179,#180,#181)),
 #199=IfcPropertySet('3IC75ptmHAcfynDO0H7l6Q',#1,'Pset_RoofCommon',$,(#197,#198)),
 #213=IfcPropertySet('2ups0JG6n1VvC4pdB0zluW',#1,'Pset_BeamCommon',$,(#210,

### 5.2.3. Functions Building Guidance:

**node_set:**

In [111]:
entities_classes_identify(all_ents)

{'IfcBuilding',
 'IfcBuildingStorey',
 'IfcClassificationReference',
 'IfcElement',
 'IfcElementQuantity',
 'IfcMaterial',
 'IfcProject',
 'IfcPropertySet',
 'IfcSite',
 'IfcTypeObject'}

**pset_props_collect & qset_props_collect:**

In [122]:
props = props_identify(all_ents)
props

{'IfcPropertySet': {'ConstructionMethod': {'property_type': 'IfcPropertySingleValue'},
  'Status': {'property_type': 'IfcPropertyEnumeratedValue'},
  'IsExternal': {'property_type': 'IfcPropertySingleValue'},
  'LoadBearing': {'property_type': 'IfcPropertySingleValue'}},
 'IfcElementQuantity': {'NetVolume': {'property_type': 'IfcQuantityVolume'},
  'Width': {'property_type': 'IfcQuantityLength'},
  'Length': {'property_type': 'IfcQuantityLength'},
  'NetSideArea': {'property_type': 'IfcQuantityArea'},
  'CrossSectionArea': {'property_type': 'IfcQuantityArea'}}}

### 5.2.4. Nodes Dictionary:

In [176]:
nodes = nodes_maker(all_ents)

## 5.3. Sanity Checks:

In [152]:
elements_distribution = elements_inspection(model)
# elements_distribution

Total number of elements: 15
IfcBeam: 6.
IfcChimney: 1.
IfcFooting: 1.
IfcRoof: 1.
IfcWall: 4.
IfcDiscreteAccessory: 2.


In [182]:
sanity(expected, actual, relating_ents, related_ents)

Edge count check passed: expected and actual are consistent (79).
Relating entities count check passed: 79 = number of edges (79).
Related entities count check passed: 79 = number of edges (79).
Unique node union check passed: |union|=64, |relating|=47, |related|=22


# 6. Neo4J graph: