# Title: csck700_ifc_parser

### The module parses an IFC model to extract the IFcElements, their properties and relationships and loads them to nodes and edges into Neo4J.

# 0. Table of content:

- [1. Settings:](#1.-Settings:)  
- [2. Imports:](#2.-Imports:)  
- [3. Helper functions:](#3.-Helper-functions:)  
    - [3.1. IfcRelationships - Edges:](#3.1.-IfcRelationships---Edges:)  
    - [3.2. IfcEntities - Nodes:](#3.2.-IfcEntities---Nodes:)  
    - [3.3. Sanity Checks:](#3.3.-Sanity-Checks:)  
- [4. Data load:](#4.-Data-load:)    
- [5. IFC Parsing:](#5.-IFC-Parsing:)  
    - [5.1. IfcRelationships - Edges Preparation:](#5.1.-IfcRelationships---Edges-Preparation:)  
    - [5.2. IfcEntities - Nodes Preparation:](#5.2.-IfcEntities---Nodes-Preparation:)  
    - [5.3. Sanity Checks:](#5.3.-Sanity-Checks:)  
- [6. Neo4J graph:](#6.-Neo4J-graph:)   

# 1. Settings:

In [1]:
IFC_PATH = r"..\data\raw\Building-Structural.ifc"

NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "TitineTiteFiro@1952"
DATABASE = "capstone"

In [2]:
REL_MAP ={
    "IfcRelDefinesByProperties": ("RelatingPropertyDefinition", "RelatedObjects", None),
    "IfcRelDefinesByType": ("RelatingType", "RelatedObjects", "DEFINED_BY_TYPE"),
    "IfcRelAssociatesMaterial": ("RelatingMaterial", "RelatedObjects", "ASSOCIATED_MATERIAL"),
    "IfcRelContainedInSpatialStructure": ("RelatingStructure", "RelatedElements", "CONTAINED_IN"),
    "IfcRelAggregates": ("RelatingObject", "RelatedObjects", "AGGREGATES"),
    "IfcRelAssociatesClassification": ("RelatingClassification", "RelatedObjects", "ASSOCIATED_CLASSIFICATION")
} 

# 2. Imports:

In [3]:
import ifcopenshell
import ifcopenshell.util.element as util
from neo4j import GraphDatabase
from collections import Counter, defaultdict

# 3. Helper functions:

## 3.1. IfcRelationships - Edges:

In [4]:
def identify_rel(model):
    """
    Identify all distinct IfcRelationship classes present in the model.
    """

    rel_classes = set()

    rels = model.by_type("IfcRelationship")

    for rel in rels:
        rel_classes.add(rel.is_a())

    print("Identified relationship classes:")  
    for r in rel_classes:
        print(f"- {r}.")
        
    return rel_classes

In [5]:
def obj_id(o):
    """
    Returns the GUID of the given object, if present, else synthetic ClassName:step_id.

    Non-IfcRoot entities (e.g. IfcMaterial, IfcClassificationReference)
    do not have a GlobalId, so the fallback ensures uniqueness.
    """
    return getattr(o, "GlobalId", f"{o.is_a()}:{o.id()}")

In [6]:
def as_list(v):
    """
    Ensures the value is returned as a list.
    """
    if v is None:
        return []
    return v if isinstance(v, (list, tuple)) else [v]

In [7]:
def edges_maker(model, rel_map=REL_MAP, pset_qto=True):
    """
    For a given model, creates a dictionary of edges (IfcRelationships exploded into from -> to pairs.
    Returns:
        - edges: a dictionary of edges with keys:
            - id, labels, rel_class, rel_id, from, to.
    """

    edges = dict()
    pairs = set()

    for rel_class, (relating_attr, related_attr, label) in rel_map.items():
        for rel in model.by_type(rel_class):
            relating = getattr(rel, relating_attr, None)
            related = getattr(rel, related_attr, None)
            if relating is None:
                continue
            
            # distinction property set vs quantity set:
            if rel_class == "IfcRelDefinesByProperties":
                if pset_qto and relating.is_a("IfcElementQuantity"):
                    edge_label = "DEFINED_BY_QUANTITIES"
                else:
                    edge_label = "DEFINED_BY_PROPERTIES"
            else:
                edge_label = label

            frm = obj_id(relating)
            rel_gid = obj_id(rel)

            # one-to-many relationships:
            for r in as_list(related):
                if r is None:
                    continue
                to  = obj_id(r)
                
                # sanity check:
                pairs.add((rel_gid, frm, to))
                
                edge_id = f"{rel_gid}:{frm}->{to}"
                edges[edge_id]={
                    "id": edge_id,
                    "labels": edge_label,
                    "rel_class": rel_class,
                    "rel_id": rel_gid,
                    "from": frm,
                    "to": to
                }

    expected = len(pairs)
    actual = len(edges)
    
    # print(f"Expected number of edges: {expected}.")
    # print(f"Number of edges: {actual}.")
    
    return expected, actual, edges

## 3.2. IfcEntities - Nodes:

In [8]:
def get_entity_from_id(model, oid):
    """
    Returns the Ifc entity from the given id or GUID.
    """

    if isinstance(oid, str) and (len(oid) == 22) and (":" not in oid):
        return model.by_guid(oid)

    elif ":" in oid:
        try:
            step_id = int(oid.split(":")[-1])
            return model.by_id(step_id)
        except Exception:
            return None

    return None

In [9]:
def get_entities_from_edges(model, edges, typ):
    """
    Returns the relating or related Ifc entities from a dict of edges.
    """

    entities_from_edges = []
    
    for edge in edges.values():
        obj = get_entity_from_id(model, edge[typ])
        if obj:
            entities_from_edges.append(obj) 

    return entities_from_edges

In [10]:
def merge_unique(obj1, obj2):
    """
    Returns a list of unique Ifc entities from two lists of Ifc entities.
    """
    merged = dict()
    for o in obj1 + obj2:
        if o:
            merged[obj_id(o)] = o
    return list(merged.values())

In [11]:
def entities_classes_identify(all_objects):
    """
    Identifies the Ifc entity classes present in the model.
    All IfcElements subtypes collapse to 'IfcElement'.
    All *Type classes collapse to 'IfcTypeObject'.
    """

    clss = set()
    
    for o in all_objects:
        if o.is_a("IfcElement"):
            clss.add("IfcElement")
        elif "Type" in o.is_a():
            clss.add("IfcTypeObject")
        else:
            clss.add(o.is_a())

    return clss

In [12]:
def props_identify(all_objects):

    props = {}

    pset = "IfcPropertySet"
    qset = "IfcElementQuantity"

    for o in all_objects:
        if o.is_a(pset):
            props.setdefault(pset, dict())
            for p in o.HasProperties:
                props[pset][p.Name] = {
                    "property_type": p.is_a()
                }
                
        elif o.is_a(qset):
            props.setdefault(qset, dict())
            for q in o.Quantities:
                props[qset][q.Name] = {
                    "property_type": q.is_a()
                }

    # only IfcMaterial class, no specific material properties to extract beyond the material name.
                
    return props

In [13]:
def pset_props_collect(o):
    """
    Extracts the properties from an IfcPropertySet entity. 
    """

    def unwrap(v):
        """
        Converts an Ifc value wrapper to a plain Python type.
        """
        if v is None:
            return None
        else:
            return getattr(v, "wrappedValue", v)

    props = {}
    
    for p in o.HasProperties:
        if p.is_a("IfcPropertySingleValue"):
            nv = getattr(p, "NominalValue", None)
            props[p.Name] = {
                "kind": "SingleValue",
                "value": unwrap(nv)
            }
        elif p.is_a("IfcPropertyEnumeratedValue"):
            vals = [unwrap(v) for v in (getattr(p, "EnumerationValues", []) or [])]
            enum_ref = getattr(p, "EnumerationReference", None)
            domain = (
                [unwrap(v) for v in (getattr(enum_ref, "EnumerationValues", []) or [])]
                if enum_ref else None
            )
            props[p.Name] = {
                "kind": "EnumeratedValue",
                "value": vals if len(vals) != 1 else vals[0],
                "enumeration_name": getattr(enum_ref, "Name", None) if enum_ref else None,
                "domain": domain
            }

    # if needed, to be developed to handle other kinds of properties.
        
    return props

In [14]:
def qset_props_collect(o, include_none=False):
    """
    Extracts the quantitiy properties from an IfcElementQuantity. 
    """

    props = {}
    
    for q in getattr(o, "Quantities", []) or []:
        val = None
        if q.is_a("IfcQuantityLength"):
            val = q.LengthValue
        elif q.is_a("IfcQuantityArea"):
            val = q.AreaValue
        elif q.is_a("IfcQuantityVolume"):
            val = q.VolumeValue
        elif q.is_a("IfcQuantityCount"):
            val = q.CountValue
        elif q.is_a("IfcQuantityWeight"):
            val = q.WeightValue

        if include_none or val is not None:  
            props[q.Name] = {
                "kind": q.is_a(),
                "value": val
            }

    return props

In [15]:
def node_set(o):
    """
    Extract the node information for a single Ifc entity.
    """

    node = dict()

    # helpers:
    def g_n(o, name, default=None):
        """
        Extract an attribute for an Ifc entity.
        """
        return getattr(o, name, default)

    def basic_props(label, addition=None):
        """
        Set the base node dictionary for an Ifc entity and add it to "node".
        """
        node_id = obj_id(o)
        base = {
            "id": node_id,
            "labels": [label, o.is_a()],
            "name": g_n(o, "Name")
        }
        if addition:
            base.update(addition)
        node[node_id] = base

    
    cls = o.is_a()

    if cls == "IfcProject":
        basic_props("PROJECT")
        
    elif o.is_a("IfcElement"):
        # ObjectType fallback:
        basic_props("ELEMENT", {
            "predefined_type": g_n(o, "PredefinedType", g_n(o, "ObjectType"))
        })
        
    elif cls == "IfcPropertySet":
        basic_props("PSET", {
            "properties": pset_props_collect(o)
        })
        
    elif cls == "IfcElementQuantity":
        basic_props("QSET", {
            "properties": qset_props_collect(o)
        })
        
    elif "Type" in cls:
        basic_props("TYPE")
        
    elif cls == "IfcMaterial":
        basic_props("MATERIAL")
        
    elif cls in ("IfcSite", "IfcBuilding", "IfcBuildingStorey", "IfcSpace"):
        basic_props("SPATIAL")
        
    elif cls == "IfcClassificationReference":
        scheme = g_n(o, "ReferencedSource") # IfcClassification
        basic_props("CLASSIFICATION_REF",{
            "code": g_n(o, "Identification"),
            "uri": g_n(o, "Location"),
            # link to IfcClassification by id:
            "scheme_id": obj_id(scheme) if scheme else None,
        })

    return node        

In [16]:
def nodes_maker(all_objects):
    """
    For a given model, creates a dictionary of nodes from a list of Ifc entities.
    """

    nodes = {}

    for o in all_objects:
        nodes.update(node_set(o))

    return nodes

## 3.3. Sanity Checks:  

In [17]:
def elements_inspection(model, ifc_type="IfcProduct"):
    """
    Counts and lists IfcElements (excluding proxies in this case) grouped by class.
    Returns:
        - elements: list.
        - classes_counter: Counter.
    """
    
    products = model.by_type(ifc_type)
    elements = [p for p in products if (p.is_a("IfcElement")) and not (p.is_a("IfcBuildingElementProxy"))]
    elements_classes = [el.is_a() for el in elements]
    classes_counter = Counter(elements_classes)

    print(f"Total number of elements: {len(elements)}")
    for cl, count in classes_counter.items():
        print(f"{cl}: {count}.")

    return classes_counter

In [18]:
def sanity(expected, actual, relating, related):
    """
    Sanity checks:
        - Proxies presence.
        - Edge count.
        - Relating entities count.
        - Related entities count.
        - Union nodes.
    Returns:
        bool: True if all checks pass, else False.
    """
    passed = True

    proxies = []
    
    for e in all_ents:
        if "Proxy" in e.is_a():
            proxies.append(e.id())

    if len(proxies) == 0:
        print("Proxy check passed, there is no Proxy entity.")
    else:
        print(f"ERROR: Presence of Proxy entities: {proxies}")
        passed = False 
            
    if expected == actual:
        print(f"Edge count check passed: expected and actual are consistent ({actual}).")
    else:
        print(f"ERROR: edges count mismatch - expected {expected} but got {actual}.")
        passed = False
        
    if len(relating) == actual:
        print(f"Relating entities count check passed: {len(relating)} = number of edges ({actual}).")
    else:
        print(f"ERROR: relating entities count mismatch - edges {actual}, relating entities {len(relating)}.")
        passed = False
        
    if len(related) == actual:
        print(f"Related entities count check passed: {len(related)} = number of edges ({actual}).")
    else:
        print(f"ERROR: related entities count mismatch - edges {actual}, related entities {len(related)}.")
        passed = False

    relating_set = set(relating)
    related_set = set(related)
    union_nodes = relating_set | related_set

    if len(union_nodes) >= max(len(relating_set), len(related_set)):
        print(f"Unique node union check passed: |union|={len(union_nodes)}, |relating|={len(relating_set)}, |related|={len(related_set)}")
    else:
        print(f"ERROR: union size invalind - |union|={len(union_nodes)}, |relating|={len(relating_set)}, |related|={len(related_set)}")

    print(f"The overall sanity check has been succesfully passed: {passed}.")

## 6.1. Graph schema definition:

In [19]:
def ent_labels(ent_type, input, verbose=False):
    """
    Returns:
    - if ent_type == "nodes": (labels, categories, types).
    - if ent_type == "edges": edges types.
    """
    if ent_type == "edges":
        
        input_types = set()
        type_counter = Counter()
    
        for i in input.values():
            label = i["labels"]
            input_types.add(label)
            type_counter[label] += 1 
            
        print(f"Number of edge types: {len(input_types)}.")

        if verbose:
                print()
                print(f"TOTAL NUMBER OF NODES: {len(input)}")
                print()
                print("EDGE TYPES:")
                for t in sorted(type_counter):
                    print(f" - {t}: {type_counter[t]}.")
            
        return type_counter
    
    elif ent_type == "nodes":
        
        input_labels = set()
        input_cats = set()
        input_types = set()
        cat_counter = Counter()
        type_counter = Counter()
        
        for i in input.values():
            labels = i["labels"]
            cat, typ = labels
            input_labels.add((cat, typ))
            input_cats.add(cat)
            input_types.add(typ)
            cat_counter[cat] += 1
            type_counter[typ] += 1
        
        print(f"Number of node categories: {len(input_cats)}.")
        print(f"Number of node types: {len(input_types)}.")
    
        if verbose:
            print()
            print(f"TOTAL NUMBER OF NODES: {len(input)}")
            print()
            print("NODE CATEGORIES:")
            for c in sorted(cat_counter):
                print(f" - {c}: {cat_counter[c]}.")
            print()
            print("NODE TYPES:")
            for t in sorted(type_counter):
                print(f" - {t}: {type_counter[t]}.")
        
        return cat_counter, type_counter

In [20]:
def search_attr(ents, verbose=False, entity=None):
    """
    Analyse the node or edge attributes by category and collect the schema profiles.
    """

    vals = ents.values()
    
    # CATEGORY: top level attributes per category
    cat_attr = defaultdict(set)
    cat_attr_counts = defaultdict(Counter)
    cat_totals = Counter()
    
    for v in vals:
        if entity == "node":
            cat = v["labels"][0]
        else:
            cat = v["labels"]
        cat_totals[cat] += 1
        for a in v.keys():
            if a == "labels":
                continue
            cat_attr[cat].add(a)
            cat_attr_counts[cat][a] += 1

    if entity == "node":
    
        # PSET: property names per PSET
        pset_prop = defaultdict(set)
        pset_prop_counts = defaultdict(Counter)
        
        for v in vals:
            cat = v["labels"][0]
            if cat != "PSET":
                continue
            name = v.get("name")
            for k in (v.get("properties", {}) or {}).keys():
                pset_prop[name].add(k)
                pset_prop_counts[name][k] += 1
        
        # QSET: quantity names per Qset
        qset_prop = defaultdict(set)
        qset_prop_counts = defaultdict(Counter)
        
        for v in vals:
            cat = v["labels"][0]
            if cat != "QSET":
                continue
            name = v.get("name")
            for k in (v.get("properties", {}) or {}).keys():
                qset_prop[name].add(k)
                qset_prop_counts[name][k] += 1

    if not verbose:

        print(f"\n==={entity.upper()} ATTRIBUTE ANALYSIS===\n")
        print("Top-level attributes per category:")
        print(cat_attr)
        print(cat_attr_counts)
        print(cat_totals)
        print()
        
        if entity == "node":
            
            print("Property names per Pset:")
            print(pset_prop)
            print()
            print("Quantities names per Qset:")
            print(qset_prop)
            print(qset_prop_counts)

    if verbose:

        print(f"\n==={entity.upper()} ATTRIBUTE ANALYSIS===\n")
        print("Top-level attributes per category:")
        for cat in sorted(cat_attr):
            print(f" - {cat} ({cat_totals[cat]} nodes): {sorted(cat_attr[cat])}")
            for a, c in cat_attr_counts[cat].most_common():
                print(f"    • {a}: {c}/{cat_totals[cat]}")
            print()

        if entity == "node":
    
            print("\n PSET property keys per set:")
            for pset in sorted(pset_prop):
                print(f" - {pset}: {sorted(pset_prop[pset])}")
                for k, c in pset_prop_counts[pset].most_common():
                    print(f"    • {k}: {c}")
                print()
        
            print("\n QSET quantity keys per set:")
            for qset in sorted(qset_prop):
                print(f" - {qset}: {sorted(qset_prop[qset])}")
                for k, c in qset_prop_counts[qset].most_common():
                    print(f"    • {k}: {c}")
                print()

    if entity == "node":
    
        return (
            cat_attr, cat_attr_counts, cat_totals, 
            pset_prop, pset_prop_counts, 
            qset_prop, qset_prop_counts
        )

    if entity == "edge":

        return (
            cat_attr, cat_attr_counts, cat_totals
        )

## 6.2. Nodes insertion / 6.3. Relationshsips/edges insertion:

In [21]:
def flatten_dict(dico, parent_key = "", sep="_"):
    """Flatten a nested dictionary recursively."""

    flattened = dict()

    for k, v in dico.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            flattened.update(flatten_dict(v, new_key, sep=sep))
        else:
            flattened[new_key] = v

    return flattened

In [22]:
def driver_merge(ents, entity=None):
    """
    Merge nodes or edges into Neo4J from prepared dictionaries.
    """

    with driver.session(database=DATABASE) as session:

        if entity == "node":

            for node in nodes_ready.values():
                labels = ":"+":".join(node["labels"])
                attr = {k: v for k, v in node.items() if k not in ("labels", "id")}
        
                cypher_node = f"""
                MERGE (n{labels}{{id: $id}})
                SET n += $attr
                """
        
                session.run(cypher_node, {"id": node["id"], "attr": attr}) 

        elif entity == "edge":

            for edge in edges.values():
                attr = {k: v for k, v in edge.items() if k not in ("from", "to", "labels", "rel_id")}
        
                cypher_edge = f"""
                MATCH (a {{id: $from_id }}), (b {{id: $to_id}})
                MERGE (a)-[r:{edge["labels"]}{{rel_id: $rel_id}}]->(b)
                SET r += $attr
                """
        
                session.run(cypher_edge, {
                    "from_id": edge["from"],
                    "to_id": edge["to"],
                    "rel_id": edge["rel_id"],
                    "attr": attr
                        })  

## 6.4. Graph integrity:

# 4. Data load:

In [23]:
model = ifcopenshell.open(IFC_PATH)

In [24]:
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))

# 5. IFC Parsing:

The parsing process begins with the IfcRelationships, from which a dictionary of Edges is created to support Neo4j graph construction. Each edge captures the "from" and "to" attributes that define the connected IFC entities. Based on these connections, all distinct entities can be identified and represented in a dictionary of Nodes. Inverse relationships are not included, as Neo4j inherently supports bidirectional traversal of edges; storing them explicitly would only duplicate information and increase memory usage. Proxy entities are removed in this model, as they are not significant building elements but instead geolocation artifacts.   

To ensure consistency, node identifiers are normalized (e.g. through unique obj_id values), and key metadata such as IFC class labels and relevant properties are attached to nodes and edges. This guarantees that the resulting graph remains both semantically clear and efficient for querying.  

## 5.1. IfcRelationships - Edges Preparation:

### 5.1.1. Functions Building Guidance:

**edges_maker:**

In [25]:
identify_rel(model)

Identified relationship classes:
- IfcRelAggregates.
- IfcRelDefinesByProperties.
- IfcRelDefinesByType.
- IfcRelContainedInSpatialStructure.
- IfcRelAssociatesMaterial.
- IfcRelAssociatesClassification.


{'IfcRelAggregates',
 'IfcRelAssociatesClassification',
 'IfcRelAssociatesMaterial',
 'IfcRelContainedInSpatialStructure',
 'IfcRelDefinesByProperties',
 'IfcRelDefinesByType'}

### 5.1.2. Edges Dictionary:

In [26]:
expected, actual, edges = edges_maker(model)

## 5.2. IfcEntities - Nodes Preparation:

### 5.2.1. Collect IfcEntities:

In [27]:
relating_ents = get_entities_from_edges(model, edges, "from")

In [28]:
related_ents = get_entities_from_edges(model, edges, "to")

In [29]:
all_ents = merge_unique(relating_ents, related_ents)

### 5.2.2. Proxies:

# ALSO DROP RELATIONSHIPS IMPACTED BY PROXIES = **ERROR CAUSE**

In [30]:
# all_ents = [o for o in all_ents if "Proxy" not in o.is_a()]

### 5.2.3. Functions Building Guidance:

**node_set:**

In [31]:
entities_classes_identify(all_ents)

{'IfcBuilding',
 'IfcBuildingStorey',
 'IfcClassificationReference',
 'IfcElement',
 'IfcElementQuantity',
 'IfcMaterial',
 'IfcProject',
 'IfcPropertySet',
 'IfcSite',
 'IfcTypeObject'}

**pset_props_collect & qset_props_collect:**

In [32]:
props = props_identify(all_ents)
props

{'IfcPropertySet': {'ConstructionMethod': {'property_type': 'IfcPropertySingleValue'},
  'Status': {'property_type': 'IfcPropertyEnumeratedValue'},
  'IsExternal': {'property_type': 'IfcPropertySingleValue'},
  'LoadBearing': {'property_type': 'IfcPropertySingleValue'}},
 'IfcElementQuantity': {'NetVolume': {'property_type': 'IfcQuantityVolume'},
  'Width': {'property_type': 'IfcQuantityLength'},
  'Length': {'property_type': 'IfcQuantityLength'},
  'NetSideArea': {'property_type': 'IfcQuantityArea'},
  'CrossSectionArea': {'property_type': 'IfcQuantityArea'}}}

### 5.2.4. Nodes Dictionary:

In [33]:
nodes = nodes_maker(all_ents)

## 5.3. Sanity Checks:

In [34]:
elements_distribution = elements_inspection(model)
# elements_distribution

Total number of elements: 15
IfcBeam: 6.
IfcChimney: 1.
IfcFooting: 1.
IfcRoof: 1.
IfcWall: 4.
IfcDiscreteAccessory: 2.


In [35]:
sanity(expected, actual, relating_ents, related_ents)

ERROR: Presence of Proxy entities: [370, 389, 372, 391, 162]
Edge count check passed: expected and actual are consistent (79).
Relating entities count check passed: 79 = number of edges (79).
Related entities count check passed: 79 = number of edges (79).
Unique node union check passed: |union|=64, |relating|=47, |related|=22
The overall sanity check has been succesfully passed: False.


# 6. Neo4J graph:

## 6.1. Graph schema definition:

By collecting node labels, categories, types, edge types, and the attributes and properties of both nodes and edges during IFC parsing, a baseline schema profile of the model is established. Once the data is imported into Neo4j, the same metrics can be recomputed with Cypher and compared against this baseline. This provides a sanity check to confirm that the Neo4j graph faithfully reflects the IFC structure, preserves attributes and relationships, and has not lost or altered information during transformation. In this way, the metrics act as a fingerprint of the IFC dataset, supporting the integrity and consistency of the final graph.  

### 6.1.1. Nodes labels, categories and types:

In [36]:
n_cat_counter, n_type_counter = ent_labels("nodes", nodes, True)

Number of node categories: 8.
Number of node types: 22.

TOTAL NUMBER OF NODES: 64

NODE CATEGORIES:
 - CLASSIFICATION_REF: 1.
 - ELEMENT: 18.
 - MATERIAL: 7.
 - PROJECT: 1.
 - PSET: 12.
 - QSET: 10.
 - SPATIAL: 4.
 - TYPE: 11.

NODE TYPES:
 - IfcBeam: 6.
 - IfcBeamType: 1.
 - IfcBuilding: 1.
 - IfcBuildingElementProxy: 3.
 - IfcBuildingElementProxyType: 2.
 - IfcBuildingStorey: 1.
 - IfcChimney: 1.
 - IfcChimneyType: 1.
 - IfcClassificationReference: 1.
 - IfcDiscreteAccessory: 2.
 - IfcDiscreteAccessoryType: 1.
 - IfcElementQuantity: 10.
 - IfcFooting: 1.
 - IfcFootingType: 1.
 - IfcMaterial: 7.
 - IfcProject: 1.
 - IfcPropertySet: 12.
 - IfcRoof: 1.
 - IfcRoofType: 1.
 - IfcSite: 2.
 - IfcWall: 4.
 - IfcWallType: 4.


### 6.1.2. Relationships/edges types:

In [37]:
e_type_counter = ent_labels("edges", edges, True)

Number of edge types: 7.

TOTAL NUMBER OF NODES: 79

EDGE TYPES:
 - AGGREGATES: 12.
 - ASSOCIATED_CLASSIFICATION: 1.
 - ASSOCIATED_MATERIAL: 17.
 - CONTAINED_IN: 10.
 - DEFINED_BY_PROPERTIES: 12.
 - DEFINED_BY_QUANTITIES: 10.
 - DEFINED_BY_TYPE: 17.


### 6.1.3. Nodes & edges attributes:

**NODES ATTRIBUTES & PROPERTIES:**

In [38]:
n_cat_attr, n_cat_attr_counts, n_cat_totals, n_pset_prop, n_pset_prop_counts, n_qset_prop, n_qset_prop_counts = search_attr(nodes, True, "node")


===NODE ATTRIBUTE ANALYSIS===

Top-level attributes per category:
 - CLASSIFICATION_REF (1 nodes): ['code', 'id', 'name', 'scheme_id', 'uri']
    • id: 1/1
    • name: 1/1
    • code: 1/1
    • uri: 1/1
    • scheme_id: 1/1

 - ELEMENT (18 nodes): ['id', 'name', 'predefined_type']
    • id: 18/18
    • name: 18/18
    • predefined_type: 18/18

 - MATERIAL (7 nodes): ['id', 'name']
    • id: 7/7
    • name: 7/7

 - PROJECT (1 nodes): ['id', 'name']
    • id: 1/1
    • name: 1/1

 - PSET (12 nodes): ['id', 'name', 'properties']
    • id: 12/12
    • name: 12/12
    • properties: 12/12

 - QSET (10 nodes): ['id', 'name', 'properties']
    • id: 10/10
    • name: 10/10
    • properties: 10/10

 - SPATIAL (4 nodes): ['id', 'name']
    • id: 4/4
    • name: 4/4

 - TYPE (11 nodes): ['id', 'name']
    • id: 11/11
    • name: 11/11


 PSET property keys per set:
 - Pset_BeamCommon: ['IsExternal', 'LoadBearing', 'Status']
    • Status: 6
    • IsExternal: 6
    • LoadBearing: 6

 - Pset_Buildi

**EDGES ATTRIBUTES:**

In [39]:
e_cat_attr, e_cat_attr_counts, e_cat_totals = search_attr(edges, True, "edge")


===EDGE ATTRIBUTE ANALYSIS===

Top-level attributes per category:
 - AGGREGATES (12 nodes): ['from', 'id', 'rel_class', 'rel_id', 'to']
    • id: 12/12
    • rel_class: 12/12
    • rel_id: 12/12
    • from: 12/12
    • to: 12/12

 - ASSOCIATED_CLASSIFICATION (1 nodes): ['from', 'id', 'rel_class', 'rel_id', 'to']
    • id: 1/1
    • rel_class: 1/1
    • rel_id: 1/1
    • from: 1/1
    • to: 1/1

 - ASSOCIATED_MATERIAL (17 nodes): ['from', 'id', 'rel_class', 'rel_id', 'to']
    • id: 17/17
    • rel_class: 17/17
    • rel_id: 17/17
    • from: 17/17
    • to: 17/17

 - CONTAINED_IN (10 nodes): ['from', 'id', 'rel_class', 'rel_id', 'to']
    • id: 10/10
    • rel_class: 10/10
    • rel_id: 10/10
    • from: 10/10
    • to: 10/10

 - DEFINED_BY_PROPERTIES (12 nodes): ['from', 'id', 'rel_class', 'rel_id', 'to']
    • id: 12/12
    • rel_class: 12/12
    • rel_id: 12/12
    • from: 12/12
    • to: 12/12

 - DEFINED_BY_QUANTITIES (10 nodes): ['from', 'id', 'rel_class', 'rel_id', 'to']
    • 

## 6.2. Nodes insertion:

To set up the Neo4j graph, both the **"nodes"** dictionary and the **"edges"** dictionary generated during the IFC parsing stage will serve as the input. These dictionaries represent the essential building blocks of the graph:  
- the **"nodes"** (with their labels, categories, types, attributes and properties/quantities)  
- the **"edges"** (with their relationship categories and endpoints)

They will be passed into Neo4j through the **Neo4j Python driver**, which manages the connection and Cypher queries used to create the graph.

### 6.2.1. Dictionary preparation:

In [40]:
nodes_ready = dict()

for node in nodes.values():
    # preserve "labels"
    base = {k: v for k, v in node.items() if k != "labels"}
    flat = flatten_dict(base)
    flat["labels"] = node["labels"]
    nodes_ready[node["id"]] = flat

### 6.2.2. Driver run:

In [41]:
driver_merge(nodes_ready, "node")

## 6.3. Relationships/edges insertion:

### 6.3.1. Driver run:

In [42]:
driver_merge(edges, "edge")

## 6.4. Graph integrity:

### 6.4.1. Nodes labels, categories and types:

In [43]:
# cat_passed, cat_actual = check_nodes_labels(n_cat_counter, "categories")

In [44]:
# typ_passed, typ_actual = check_nodes_labels(n_type_counter, "types")

### 6.4.2. Edges labels:

In [45]:
def check_labels(expected, entities=None, label_kind=None):
    """
    Compare the expected vs the actual counts in Neo4J:
        - Node categories/types (if entities == "nodes")
        - Edge types (if entities == "edges")
    """

    exp_k = list(expected.keys())

    if entities == "nodes":
        cypher = """
        UNWIND $exp_k AS k
        MATCH (n)
        WHERE k IN labels(n)
        RETURN k AS item, count(*) AS c
        """
        
    elif entities == "edges":
        cypher = """
        UNWIND $exp_k AS k
        MATCH ()-[r]->()
        WHERE type(r) = k
        RETURN k AS item, count(*) As c
        """
        
    else:
        raise ValueError("entities must be 'nodes' or 'edges'.")
    
    with driver.session(database=DATABASE) as session:
        results = session.run(cypher, {"exp_k": exp_k}).data()

    actual = { r["item"]: r["c"] for r in results}
    
    # check:
    all_keys = sorted(set(expected) | set(actual))
    passed = True
    
    for a in all_keys:
        exp = expected.get(a, 0)
        act = actual.get(a, 0)
        if exp != act:
            print(f"{a}, expected = {exp}, actual = {act}, difference = {abs(exp - act)}.")
            passed = False
            
    print(f"The {entities} {label_kind} match: {passed}.")

    return passed, actual

In [46]:
check_labels(n_cat_counter, "nodes", "categories")

The nodes categories match: True.


(True,
 {'PSET': 12,
  'QSET': 10,
  'TYPE': 11,
  'MATERIAL': 7,
  'SPATIAL': 4,
  'PROJECT': 1,
  'ELEMENT': 18,
  'CLASSIFICATION_REF': 1})

In [47]:
check_labels(n_type_counter, "nodes", "types")

The nodes types match: True.


(True,
 {'IfcPropertySet': 12,
  'IfcElementQuantity': 10,
  'IfcFootingType': 1,
  'IfcWallType': 4,
  'IfcChimneyType': 1,
  'IfcRoofType': 1,
  'IfcBeamType': 1,
  'IfcDiscreteAccessoryType': 1,
  'IfcBuildingElementProxyType': 2,
  'IfcMaterial': 7,
  'IfcBuildingStorey': 1,
  'IfcBuilding': 1,
  'IfcSite': 2,
  'IfcProject': 1,
  'IfcRoof': 1,
  'IfcClassificationReference': 1,
  'IfcWall': 4,
  'IfcBeam': 6,
  'IfcFooting': 1,
  'IfcChimney': 1,
  'IfcDiscreteAccessory': 2,
  'IfcBuildingElementProxy': 3})

In [48]:
check_labels(e_type_counter, "edges", "types")

The edges types match: True.


(True,
 {'DEFINED_BY_PROPERTIES': 12,
  'DEFINED_BY_QUANTITIES': 10,
  'DEFINED_BY_TYPE': 17,
  'ASSOCIATED_MATERIAL': 17,
  'CONTAINED_IN': 10,
  'AGGREGATES': 12,
  'ASSOCIATED_CLASSIFICATION': 1})

In [49]:
edges

{'141p4IPfrBnRJ5dWbakDZK:18DB$FpSH4QhNntrrSRVGz->0c$N1CTon2BB2Sp89385G8': {'id': '141p4IPfrBnRJ5dWbakDZK:18DB$FpSH4QhNntrrSRVGz->0c$N1CTon2BB2Sp89385G8',
  'labels': 'DEFINED_BY_PROPERTIES',
  'rel_class': 'IfcRelDefinesByProperties',
  'rel_id': '141p4IPfrBnRJ5dWbakDZK',
  'from': '18DB$FpSH4QhNntrrSRVGz',
  'to': '0c$N1CTon2BB2Sp89385G8'},
 '37JtmMF1HE8uDDbwzFG43E:0hVJYXG1r7ywQAq8Vpug40->0DyViLJJ175RvWQi1rE7a6': {'id': '37JtmMF1HE8uDDbwzFG43E:0hVJYXG1r7ywQAq8Vpug40->0DyViLJJ175RvWQi1rE7a6',
  'labels': 'DEFINED_BY_PROPERTIES',
  'rel_class': 'IfcRelDefinesByProperties',
  'rel_id': '37JtmMF1HE8uDDbwzFG43E',
  'from': '0hVJYXG1r7ywQAq8Vpug40',
  'to': '0DyViLJJ175RvWQi1rE7a6'},
 '1SWTv4zQ95LhMduNc2qS5N:0VVWsZ$_bFHgmAwNtcydte->0DyViLJJ175RvWQi1rE7a6': {'id': '1SWTv4zQ95LhMduNc2qS5N:0VVWsZ$_bFHgmAwNtcydte->0DyViLJJ175RvWQi1rE7a6',
  'labels': 'DEFINED_BY_QUANTITIES',
  'rel_class': 'IfcRelDefinesByProperties',
  'rel_id': '1SWTv4zQ95LhMduNc2qS5N',
  'from': '0VVWsZ$_bFHgmAwNtcydte',
  '

In [50]:
node_ids = set(nodes_ready.keys())

missing_from = []
missing_to = []

for e in edges.values():
    if e["from"] not in node_ids:
        missing_from.append((e["labels"], e["rel_id"], e["from"], e["to"]))
    if e["to"] not in node_ids:
        missing_to.append((e["labels"], e["rel_id"], e["from"], e["to"]))

print(f"Missing FROM endpoints: {len(missing_from)}")
print(f"Missing TO endpoints:   {len(missing_to)}")

# (optional) see which relationship types are affected
from collections import Counter
print("Missing FROM by type:", Counter(t for t, *_ in missing_from))
print("Missing TO by type:",   Counter(t for t, *_ in missing_to))

Missing FROM endpoints: 0
Missing TO endpoints:   0
Missing FROM by type: Counter()
Missing TO by type: Counter()


### 6.4.3. Nodes attributes and properties:

## THE NESTED PROPERTIES DICTIONARY HAS BEEN FLATTENED!!!!  
## CAPTURE ATTRIBUTES AND PROPERTIES AFTER FLATTENING FOR COMPARISON.  

### 6.4.4. Edges attributes:

## 6.5. Exploration & visualization:

## 6.6. Graph summary:

In [51]:
# driver.close()