# IFC File Extraction Tutorial

This notebook demonstrates how to extract building elements from IFC (Industry Foundation Classes) files and convert them to structured JSON format using the AEC Compliance Agent.

## What you'll learn:
1. How to load and analyze IFC files
2. Extract building elements (spaces, doors, walls, levels)
3. Convert IFC data to standardized JSON format
4. Visualize extracted building data
5. Compare IFC extraction with DWG extraction

## Setup and Imports

In [None]:
import sys
import json
import logging
from pathlib import Path
from typing import List, Dict, Any

# Add src to path
sys.path.insert(0, str(Path.cwd().parent / 'src'))

# Import our modules
from extraction.ifc_extractor import IFCExtractor, extract_from_ifc
from extraction.unified_extractor import UnifiedExtractor, analyze_file
from schemas import Project, Room, Door, Wall
from utils.visualization import create_building_plot

# Import additional libraries
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# Try to import ifcopenshell
try:
    import ifcopenshell
    print("✅ IfcOpenShell is available")
except ImportError:
    print("❌ IfcOpenShell is not installed. Install with: pip install ifcopenshell")

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

print("Setup complete!")

## 1. IFC File Analysis

Before extracting data, let's analyze an IFC file to understand its structure and contents.

In [None]:
# Example IFC file path - update this to point to your IFC file
ifc_file_path = Path('../data/blueprints/sample_building.ifc')

# Check if file exists
if not ifc_file_path.exists():
    print(f"❌ IFC file not found at: {ifc_file_path}")
    print("Please update the path to point to an actual IFC file.")
    ifc_file_path = None
else:
    print(f"✅ Found IFC file: {ifc_file_path}")
    print(f"File size: {ifc_file_path.stat().st_size / (1024*1024):.1f} MB")

In [None]:
# Analyze the IFC file structure (if file exists)
if ifc_file_path and ifc_file_path.exists():
    try:
        print("Analyzing IFC file structure...")
        analysis = analyze_file(ifc_file_path)
        
        print("\n📊 IFC File Analysis:")
        print(f"Schema: {analysis.get('ifc_schema', 'Unknown')}")
        print(f"Total entities: {analysis.get('total_entities', 0):,}")
        print(f"Building stories: {analysis.get('building_stories', 0)}")
        print(f"Spaces: {analysis.get('spaces', 0)}")
        print(f"Doors: {analysis.get('doors', 0)}")
        print(f"Walls: {analysis.get('walls', 0)}")
        print(f"Windows: {analysis.get('windows', 0)}")
        
        # Show top entity types
        entity_counts = analysis.get('entity_counts', {})
        if entity_counts:
            print("\n🏗️ Top Entity Types:")
            top_entities = sorted(entity_counts.items(), key=lambda x: x[1], reverse=True)[:10]
            for entity_type, count in top_entities:
                print(f"  {entity_type}: {count}")
        
    except Exception as e:
        print(f"❌ Error analyzing file: {e}")
else:
    print("⚠️ Skipping analysis - no IFC file available")

## 2. Raw IFC Data Exploration

Let's explore the raw IFC data using ifcopenshell directly to understand the file structure.

In [None]:
# Load IFC file directly with ifcopenshell (if available)
if ifc_file_path and ifc_file_path.exists():
    try:
        print("Loading IFC file with ifcopenshell...")
        ifc_file = ifcopenshell.open(str(ifc_file_path))
        
        print(f"✅ Loaded IFC file: {ifc_file.schema}")
        
        # Get project information
        projects = ifc_file.by_type('IfcProject')
        if projects:
            project = projects[0]
            print(f"\n🏢 Project: {project.Name or 'Unnamed'}")
            if project.Description:
                print(f"Description: {project.Description}")
        
        # Get building information
        buildings = ifc_file.by_type('IfcBuilding')
        if buildings:
            building = buildings[0]
            print(f"\n🏗️ Building: {building.Name or 'Unnamed'}")
            if building.Description:
                print(f"Description: {building.Description}")
        
        # Get building stories
        stories = ifc_file.by_type('IfcBuildingStorey')
        print(f"\n📐 Building Stories ({len(stories)}):")
        for i, story in enumerate(stories[:5]):  # Show first 5
            name = story.Name or f"Story {story.id()}"
            elevation = getattr(story, 'Elevation', 'Unknown')
            print(f"  {i+1}. {name} (Elevation: {elevation})")
        
        if len(stories) > 5:
            print(f"  ... and {len(stories) - 5} more")
        
        # Get some sample spaces
        spaces = ifc_file.by_type('IfcSpace')
        print(f"\n🏠 Spaces/Rooms ({len(spaces)}):")
        for i, space in enumerate(spaces[:5]):  # Show first 5
            name = space.Name or space.LongName or f"Space {space.id()}"
            print(f"  {i+1}. {name}")
        
        if len(spaces) > 5:
            print(f"  ... and {len(spaces) - 5} more")
        
        # Store for later use
        ifc_loaded = True
        
    except Exception as e:
        print(f"❌ Error loading IFC file: {e}")
        ifc_loaded = False
else:
    print("⚠️ Skipping raw IFC exploration - no file available")
    ifc_loaded = False

## 3. Extract Building Data with IFC Extractor

Now let's use our IFC extractor to convert the IFC data to our standardized format.

In [None]:
# Extract data using our IFC extractor
if ifc_file_path and ifc_file_path.exists():
    try:
        print("Extracting building data from IFC file...")
        
        # Create extractor and extract data
        extractor = IFCExtractor()
        success = extractor.load_file(ifc_file_path)
        
        if success:
            project = extractor.extract_all()
            
            print("\n✅ Extraction completed successfully!")
            print(f"\n📋 Project Metadata:")
            print(f"  Name: {project.metadata.project_name}")
            print(f"  Building Type: {project.metadata.building_type}")
            print(f"  Total Area: {project.metadata.total_area:.1f} m²")
            print(f"  Number of Levels: {project.metadata.number_of_levels}")
            
            print(f"\n🏗️ Building Elements:")
            print(f"  Levels: {len(project.levels)}")
            print(f"  Rooms: {len(project.get_all_rooms())}")
            print(f"  Doors: {len(project.get_all_doors())}")
            print(f"  Walls: {len(project.get_all_walls())}")
            
            # Store for visualization
            extracted_project = project
            extraction_success = True
            
        else:
            print("❌ Failed to load IFC file")
            extraction_success = False
            
    except Exception as e:
        print(f"❌ Error during extraction: {e}")
        import traceback
        traceback.print_exc()
        extraction_success = False
else:
    print("⚠️ Skipping extraction - no IFC file available")
    extraction_success = False

## 4. Analyze Extracted Data

Let's examine the extracted data in detail.

In [None]:
# Analyze extracted data
if extraction_success:
    print("📊 Detailed Analysis of Extracted Data\n")
    
    # Analyze levels
    print(f"🏢 Levels ({len(extracted_project.levels)}):")
    for i, level in enumerate(extracted_project.levels):
        print(f"  {i+1}. {level.name} (Elevation: {level.elevation:.1f}m)")
        print(f"     - Rooms: {len(level.rooms)}")
        print(f"     - Doors: {len(level.doors)}")
        print(f"     - Walls: {len(level.walls)}")
    
    # Analyze rooms
    all_rooms = extracted_project.get_all_rooms()
    if all_rooms:
        print(f"\n🏠 Rooms Analysis:")
        
        # Room uses
        room_uses = {}
        total_area = 0
        for room in all_rooms:
            room_uses[room.use] = room_uses.get(room.use, 0) + 1
            total_area += room.area
        
        print(f"  Total rooms: {len(all_rooms)}")
        print(f"  Total area: {total_area:.1f} m²")
        print(f"  Average room size: {total_area/len(all_rooms):.1f} m²")
        
        print(f"\n  Room types:")
        for use, count in sorted(room_uses.items()):
            print(f"    {use}: {count}")
        
        # Show sample rooms
        print(f"\n  Sample rooms:")
        for i, room in enumerate(all_rooms[:3]):
            print(f"    {i+1}. {room.name} ({room.use})")
            print(f"       Area: {room.area:.1f} m², Level: {room.level}")
            if room.occupancy_load:
                print(f"       Occupancy: {room.occupancy_load} people")
    
    # Analyze doors
    all_doors = extracted_project.get_all_doors()
    if all_doors:
        print(f"\n🚪 Doors Analysis:")
        print(f"  Total doors: {len(all_doors)}")
        
        # Door types
        door_types = {}
        emergency_exits = 0
        for door in all_doors:
            door_types[door.door_type] = door_types.get(door.door_type, 0) + 1
            if door.is_emergency_exit:
                emergency_exits += 1
        
        print(f"  Emergency exits: {emergency_exits}")
        print(f"\n  Door types:")
        for dtype, count in sorted(door_types.items()):
            print(f"    {dtype}: {count}")
        
        # Sample doors
        print(f"\n  Sample doors:")
        for i, door in enumerate(all_doors[:3]):
            print(f"    {i+1}. {door.name or door.id} ({door.door_type})")
            print(f"       Size: {door.width_mm}×{door.height_mm}mm")
            print(f"       Emergency exit: {'Yes' if door.is_emergency_exit else 'No'}")
    
    # Analyze walls
    all_walls = extracted_project.get_all_walls()
    if all_walls:
        print(f"\n🧱 Walls Analysis:")
        print(f"  Total walls: {len(all_walls)}")
        
        # Wall materials
        materials = {}
        total_length = 0
        for wall in all_walls:
            if wall.material:
                materials[wall.material] = materials.get(wall.material, 0) + 1
            # Calculate wall length (simplified)
            dx = wall.end_point.x - wall.start_point.x
            dy = wall.end_point.y - wall.start_point.y
            length = (dx*dx + dy*dy)**0.5
            total_length += length
        
        print(f"  Total length: {total_length:.1f} m")
        print(f"  Average wall length: {total_length/len(all_walls):.1f} m")
        
        if materials:
            print(f"\n  Wall materials:")
            for material, count in sorted(materials.items()):
                print(f"    {material}: {count}")

else:
    print("⚠️ No extracted data available for analysis")

## 5. Create Data Visualizations

Let's create some visualizations of the extracted building data.

In [None]:
# Create visualizations
if extraction_success:
    try:
        # Setup plotting
        plt.style.use('default')
        fig, axes = plt.subplots(2, 2, figsize=(15, 12))
        fig.suptitle('IFC Building Data Analysis', fontsize=16, fontweight='bold')
        
        # 1. Room areas distribution
        all_rooms = extracted_project.get_all_rooms()
        if all_rooms:
            room_areas = [room.area for room in all_rooms]
            axes[0,0].hist(room_areas, bins=min(10, len(room_areas)), alpha=0.7, color='skyblue', edgecolor='black')
            axes[0,0].set_title('Room Area Distribution')
            axes[0,0].set_xlabel('Area (m²)')
            axes[0,0].set_ylabel('Number of Rooms')
            axes[0,0].grid(True, alpha=0.3)
        
        # 2. Room uses pie chart
        if all_rooms:
            room_uses = {}
            for room in all_rooms:
                room_uses[room.use] = room_uses.get(room.use, 0) + 1
            
            if room_uses:
                uses, counts = zip(*room_uses.items())
                colors = plt.cm.Set3(np.linspace(0, 1, len(uses)))
                axes[0,1].pie(counts, labels=uses, autopct='%1.1f%%', colors=colors, startangle=90)
                axes[0,1].set_title('Room Types Distribution')
        
        # 3. Door types bar chart
        all_doors = extracted_project.get_all_doors()
        if all_doors:
            door_types = {}
            for door in all_doors:
                door_types[door.door_type] = door_types.get(door.door_type, 0) + 1
            
            if door_types:
                types, counts = zip(*door_types.items())
                axes[1,0].bar(types, counts, color='lightcoral', alpha=0.7, edgecolor='black')
                axes[1,0].set_title('Door Types Distribution')
                axes[1,0].set_xlabel('Door Type')
                axes[1,0].set_ylabel('Count')
                axes[1,0].tick_params(axis='x', rotation=45)
                axes[1,0].grid(True, alpha=0.3)
        
        # 4. Levels overview
        if extracted_project.levels:
            level_names = [level.name for level in extracted_project.levels]
            level_rooms = [len(level.rooms) for level in extracted_project.levels]
            level_doors = [len(level.doors) for level in extracted_project.levels]
            
            x = np.arange(len(level_names))
            width = 0.35
            
            axes[1,1].bar(x - width/2, level_rooms, width, label='Rooms', color='lightblue', alpha=0.7)
            axes[1,1].bar(x + width/2, level_doors, width, label='Doors', color='lightgreen', alpha=0.7)
            
            axes[1,1].set_title('Elements per Level')
            axes[1,1].set_xlabel('Level')
            axes[1,1].set_ylabel('Count')
            axes[1,1].set_xticks(x)
            axes[1,1].set_xticklabels(level_names, rotation=45, ha='right')
            axes[1,1].legend()
            axes[1,1].grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()
        
        print("✅ Visualizations created successfully!")
        
    except Exception as e:
        print(f"❌ Error creating visualizations: {e}")
        import traceback
        traceback.print_exc()
else:
    print("⚠️ No data available for visualization")

## 6. Export Data to JSON

Let's save the extracted data to a JSON file for further processing.

In [None]:
# Export to JSON
if extraction_success:
    try:
        # Create output directory
        output_dir = Path('../data/extracted')
        output_dir.mkdir(exist_ok=True)
        
        # Generate output filename
        if ifc_file_path:
            output_file = output_dir / f"{ifc_file_path.stem}_ifc_extracted.json"
        else:
            output_file = output_dir / "ifc_extracted.json"
        
        # Convert to dictionary
        project_data = extracted_project.dict()
        
        # Save to JSON
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(project_data, f, indent=2, ensure_ascii=False)
        
        print(f"✅ Data exported to: {output_file}")
        print(f"File size: {output_file.stat().st_size / 1024:.1f} KB")
        
        # Show a sample of the JSON structure
        print("\n📄 JSON Structure (sample):")
        sample_data = {
            "metadata": project_data["metadata"],
            "levels_count": len(project_data["levels"]),
            "total_rooms": len(extracted_project.get_all_rooms()),
            "total_doors": len(extracted_project.get_all_doors()),
            "total_walls": len(extracted_project.get_all_walls())
        }
        print(json.dumps(sample_data, indent=2))
        
    except Exception as e:
        print(f"❌ Error exporting data: {e}")
else:
    print("⚠️ No data available for export")

## 7. Using the Unified Extractor

The unified extractor can handle both DWG and IFC files automatically.

In [None]:
# Test unified extractor with IFC file
if ifc_file_path and ifc_file_path.exists():
    try:
        print("Testing Unified Extractor with IFC file...")
        
        # Create unified extractor
        unified_extractor = UnifiedExtractor()
        
        # Extract using unified interface
        unified_project = unified_extractor.extract_from_file(ifc_file_path)
        
        print("\n✅ Unified extraction completed!")
        print(f"Project: {unified_project.metadata.project_name}")
        print(f"Levels: {len(unified_project.levels)}")
        print(f"Rooms: {len(unified_project.get_all_rooms())}")
        print(f"Doors: {len(unified_project.get_all_doors())}")
        print(f"Walls: {len(unified_project.get_all_walls())}")
        
        # Compare with direct IFC extraction
        if extraction_success:
            print("\n🔍 Comparison with direct IFC extraction:")
            print(f"Rooms: {len(extracted_project.get_all_rooms())} → {len(unified_project.get_all_rooms())}")
            print(f"Doors: {len(extracted_project.get_all_doors())} → {len(unified_project.get_all_doors())}")
            print(f"Walls: {len(extracted_project.get_all_walls())} → {len(unified_project.get_all_walls())}")
            print("✅ Results should be identical")
        
    except Exception as e:
        print(f"❌ Error with unified extractor: {e}")
else:
    print("⚠️ Skipping unified extractor test - no IFC file available")

## 8. Comparison with DWG Extraction

Let's compare the capabilities of IFC extraction vs DWG extraction.

In [None]:
print("📊 IFC vs DWG Extraction Comparison\n")

comparison_data = [
    ["Aspect", "IFC Extraction", "DWG Extraction"],
    ["File Format", "IFC (Industry Foundation Classes)", "DWG/DXF (AutoCAD)"],
    ["Data Richness", "Very High - Semantic building information", "Medium - Geometric information"],
    ["Spaces/Rooms", "Direct from IfcSpace entities", "Inferred from closed polylines"],
    ["Doors", "Direct from IfcDoor entities", "Inferred from block insertions"],
    ["Walls", "Direct from IfcWall entities", "Inferred from line entities"],
    ["Materials", "Rich material information", "Limited material info"],
    ["Properties", "Extensive property sets", "Limited properties"],
    ["Levels/Stories", "Explicit IfcBuildingStorey", "Must be inferred"],
    ["Relationships", "Explicit spatial relationships", "Must be calculated"],
    ["Fire Ratings", "Can extract from properties", "Limited availability"],
    ["Occupancy", "Can calculate from standards", "Must be estimated"],
    ["Accuracy", "High - purpose-built for BIM", "Medium - depends on drawing quality"]
]

# Create comparison table
import pandas as pd
df = pd.DataFrame(comparison_data[1:], columns=comparison_data[0])
print(df.to_string(index=False))

print("\n💡 Key Advantages of IFC Extraction:")
print("• Semantic information: IFC files contain rich building semantics")
print("• Standardized format: IFC is an open international standard")
print("• Better relationships: Explicit spatial and functional relationships")
print("• Property sets: Extensive building element properties")
print("• Multi-disciplinary: Architecture, structure, MEP in one file")

print("\n💡 When to use each:")
print("• Use IFC: For modern BIM workflows, detailed compliance checking")
print("• Use DWG: For legacy CAD files, when IFC is not available")

## 9. Best Practices and Tips

Here are some best practices for working with IFC files in building compliance workflows.

In [None]:
print("🎯 Best Practices for IFC Extraction\n")

practices = [
    {
        "category": "File Preparation",
        "tips": [
            "Ensure IFC files are exported with proper space boundaries",
            "Include property sets relevant to fire safety and compliance",
            "Use consistent naming conventions for spaces and elements",
            "Export with appropriate level of detail for your analysis"
        ]
    },
    {
        "category": "Data Quality",
        "tips": [
            "Validate IFC files before extraction using IFC viewers",
            "Check for missing or incomplete space definitions",
            "Verify door and wall connectivity",
            "Ensure building stories are properly defined"
        ]
    },
    {
        "category": "Extraction Optimization",
        "tips": [
            "Process large files in chunks if memory is limited",
            "Filter entities by type for faster processing",
            "Cache frequently accessed property sets",
            "Use logging to track extraction progress"
        ]
    },
    {
        "category": "Compliance Checking",
        "tips": [
            "Map IFC space types to building use classifications",
            "Extract fire rating information from material properties",
            "Calculate egress distances using space relationships",
            "Validate door widths against accessibility requirements"
        ]
    }
]

for practice in practices:
    print(f"📋 {practice['category']}:")
    for tip in practice['tips']:
        print(f"  • {tip}")
    print()

print("⚠️ Common Issues and Solutions:")
issues = [
    "Missing spaces → Check IFC export settings for space boundaries",
    "Incorrect room areas → Verify space geometry and units",
    "Missing door connections → Check spatial containment relationships",
    "Performance issues → Use entity filtering and streaming for large files",
    "Property extraction fails → Check property set naming and data types"
]

for issue in issues:
    print(f"  • {issue}")

print("\n🔧 Troubleshooting Commands:")
print("# Analyze IFC file structure")
print("python -m src.extraction.unified_extractor --analyze building.ifc")
print()
print("# Extract with verbose logging")
print("python scripts/extract_ifc_files.py -f building.ifc -v")
print()
print("# Batch process directory")
print("python scripts/extract_ifc_files.py -d /path/to/ifc/files/ -o /output/dir/")

## 10. Next Steps

Now that you've learned IFC extraction, here are the next steps in your building compliance workflow.

In [None]:
print("🚀 Next Steps in Your Building Compliance Workflow\n")

next_steps = [
    {
        "step": "1. Geometry Analysis",
        "description": "Use extracted data for spatial analysis and calculations",
        "notebook": "02_calculations_simple.ipynb",
        "actions": [
            "Calculate egress distances",
            "Analyze spatial relationships",
            "Compute room adjacencies"
        ]
    },
    {
        "step": "2. RAG System Integration",
        "description": "Query building regulations using extracted data",
        "notebook": "03_rag_simple.ipynb",
        "actions": [
            "Load building regulations",
            "Query specific requirements",
            "Get contextual guidance"
        ]
    },
    {
        "step": "3. AI Agent Workflow",
        "description": "Use AI agents for automated compliance checking",
        "notebook": "04_agent_simple.ipynb",
        "actions": [
            "Automated compliance analysis",
            "Generate compliance reports",
            "Interactive Q&A about building"
        ]
    }
]

for step_info in next_steps:
    print(f"📌 {step_info['step']}")
    print(f"   {step_info['description']}")
    print(f"   Notebook: {step_info['notebook']}")
    print("   Key actions:")
    for action in step_info['actions']:
        print(f"   • {action}")
    print()

print("🎯 Recommended Workflow:")
workflow = [
    "Extract building data (IFC/DWG) → JSON format",
    "Analyze geometry and calculate compliance metrics",
    "Query regulations using RAG system",
    "Run automated compliance checks with AI agents",
    "Generate compliance reports and recommendations"
]

for i, step in enumerate(workflow, 1):
    print(f"{i}. {step}")

print("\n📚 Additional Resources:")
print("• IFC Documentation: https://www.buildingsmart.org/standards/bsi-standards/industry-foundation-classes/")
print("• IfcOpenShell Docs: https://docs.ifcopenshell.org/")
print("• Building Compliance Guides: Check your local building authority")

print("\n✅ Congratulations! You've completed the IFC Extraction Tutorial.")
if extraction_success:
    print(f"Successfully extracted {len(extracted_project.get_all_rooms())} rooms, {len(extracted_project.get_all_doors())} doors, and {len(extracted_project.get_all_walls())} walls.")
print("You're now ready to proceed with building compliance analysis!")