# **10 - EXPOSURE MODEL FILTER SCRIPT**

**IRDR0012 MSc Independent Research Project**

*   Candidate number: NWHL6
*   Institution: UCL IRDR
*   Supervisor: Dr. Roberto Gentile
*   Date: 01/09/2025
*   Version: v1.0

**Description:**

This script filters the GMPE exposure models to extract buildings matching
the selected building's classes for comparison with analytical fragility function.

**INPUT FILES:**

*   Exposure model_Master.csv

**OUTPUT FILES:**

*   filtered_exposure_master.csv
*   filtered_exposure_summary.csv


# 0. ENVIRONMENT SETUP

In [None]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

from google.colab import drive
drive.mount('/content/drive')

# Define file paths
INPUT_FOLDER = '/content/drive/MyDrive/IRDR0012_Research Project/00 INPUT/'
OUTPUT_FOLDER = '/content/drive/MyDrive/IRDR0012_Research Project/01 OUTPUT/'

print("=" * 70)
print("EXPOSURE MODEL FILTER SCRIPT")
print("=" * 70)
print("Environment setup complete")

Mounted at /content/drive
EXPOSURE MODEL FILTER SCRIPT
Environment setup complete


# 1. BUILDING CLASSIFICATION CONFIGURATION

BUILDING SELECTION CRITERIA

Based on fragility function availability and sample size analysis:
- MCF H:2 buildings -> Use with Villar-Vega MCF DNO H:2 (SA(0.3))
- MCF H:3 buildings -> Use with Villar-Vega MCF DNO H:3 (SA(1.0))
- EU H:1 buildings  -> Use with Villar-Vega MUR+ADO H:1 (PGA)

In [None]:
TARGET_BUILDING_CLASSES = {
    'MCF_H2': {
        'material': ['MCF'],
        'height': ['H:2'],
        'occupancy': ['RES', 'MIX'],
        'fragility_id': 562,
        'description': 'MCF 2-storey buildings'
    },
    'MCF_H3': {
        'material': ['MCF'],
        'height': ['H:3', 'HBET:3-6'],
        'occupancy': ['RES', 'MIX'],
        'fragility_id': 564,
        'description': 'MCF 3+ storey buildings'
    },
    'EU_H1': {
        'material': ['EU', 'E'],
        'height': ['H:1'],
        'occupancy': ['RES', 'MIX'],
        'fragility_id': 565,
        'description': 'Earth 1-storey buildings'
    }
}

print("\nTarget Building Classes:")
print("-" * 50)
for class_key, info in TARGET_BUILDING_CLASSES.items():
    print(f"{class_key}: {info['description']}")


Target Building Classes:
--------------------------------------------------
MCF_H2: MCF 2-storey buildings
MCF_H3: MCF 3+ storey buildings
EU_H1: Earth 1-storey buildings


# 2. EXPOSURE DATA PROCESSING

In [None]:
def filter_master_exposure():
    """
    Filter master exposure model to extract target building classes.

    Returns:
    --------
    DataFrame : Filtered exposure data
    """
    # Load master exposure data
    filename = f'{INPUT_FOLDER}Exposure model_Master.csv'

    print(f"\nProcessing Master Exposure Model:")
    print("-" * 40)

    try:
        df = pd.read_csv(filename)
        print(f"✓ Loaded {len(df):,} records")
        print(f"Columns available: {list(df.columns)}")
    except FileNotFoundError:
        print(f"✗ File not found: {filename}")
        return None

    # Show sample data for debugging
    print("\nSample building data:")
    sample_cols = ['Material LLRS', 'Height', 'Occupancy', 'DG']
    for col in sample_cols:
        if col in df.columns:
            unique_vals = df[col].dropna().unique()[:5]
            print(f"  {col}: {list(unique_vals)}")

    # Use the actual column names from your data
    # Extract base material (everything before first "+")
    df['parsed_material'] = df['Material LLRS'].str.split('+').str[0]
    df['parsed_height'] = df['Height']
    df['parsed_occupancy'] = df['Occupancy']

    print(f"\nApplying building classification...")

    # Apply classification based on the separate columns
    df['building_class'] = 'UNMATCHED'

    for class_key, criteria in TARGET_BUILDING_CLASSES.items():
        # Handle material patterns (list of strings)
        material_patterns = criteria['material']

        # Handle height patterns (list of strings)
        height_patterns = criteria['height']

        # Handle occupancy patterns (list of strings)
        occupancy_patterns = criteria['occupancy']

        # Create material mask
        material_mask = df['parsed_material'].isin(material_patterns)

        # Create height mask
        height_mask = df['parsed_height'].isin(height_patterns)

        # Create occupancy mask
        occupancy_mask = df['parsed_occupancy'].isin(occupancy_patterns)

        # Combine all masks
        mask = material_mask & height_mask & occupancy_mask

        matches = mask.sum()
        df.loc[mask, 'building_class'] = class_key
        print(f"  {class_key}: Found {matches} matches")

        # Debug: show breakdown by criteria
        if matches > 0:
            material_breakdown = df[mask]['parsed_material'].value_counts()
            height_breakdown = df[mask]['parsed_height'].value_counts()
            print(f"    Materials: {dict(material_breakdown)}")
            print(f"    Heights: {dict(height_breakdown)}")

    # Filter to matched buildings only
    matched_df = df[df['building_class'] != 'UNMATCHED'].copy()

    print(f"✓ Total buildings matched to target classes: {len(matched_df):,}")

    # Show classification summary
    if len(matched_df) > 0:
        class_counts = matched_df['building_class'].value_counts()
        print("\nBuilding class distribution:")
        for building_class, count in class_counts.items():
            print(f"  {building_class}: {count:,} buildings")

        # Show material and height distributions for verification
        print(f"\nMaterial distribution in matched buildings:")
        material_dist = matched_df['parsed_material'].value_counts()
        for material, count in material_dist.items():
            print(f"  {material}: {count}")

        print(f"\nHeight distribution in matched buildings:")
        height_dist = matched_df['parsed_height'].value_counts()
        for height, count in height_dist.items():
            print(f"  {height}: {count}")

        return matched_df
    else:
        print("✗ No buildings matched the classification criteria")

        # Debug: show what values we actually have
        print("\nDEBUG - Available values in data:")
        print(f"  Material LLRS values: {df['Material LLRS'].unique()[:10]}")
        print(f"  Height values: {df['Height'].unique()}")
        print(f"  Occupancy values: {df['Occupancy'].unique()}")

        return pd.DataFrame()

# 3. DATA EXPORT

In [None]:
def export_filtered_data(df):
    """
    Export filtered exposure data to CSV.

    Parameters:
    -----------
    df : DataFrame
        Filtered exposure data
    """
    if df.empty:
        print("✗ No data to export")
        return

    # Export filename
    output_filename = f'{OUTPUT_FOLDER}filtered_exposure_master.csv'

    # Export data
    df.to_csv(output_filename, index=False)
    print(f"✓ Exported {len(df):,} buildings to: filtered_exposure_master.csv")
    print(f"  Columns exported: {list(df.columns)}")

# 4. MAIN PROCESSING

In [None]:
print("\n" + "=" * 70)
print("FILTERING MASTER EXPOSURE MODEL")
print("=" * 70)

# Filter master exposure model
filtered_df = filter_master_exposure()

if filtered_df is not None and not filtered_df.empty:
    # Export filtered data
    export_filtered_data(filtered_df)

    # Generate summary statistics
    print("\n" + "=" * 70)
    print("FILTERING SUMMARY REPORT")
    print("=" * 70)

    summary_stats = []
    for building_class in filtered_df['building_class'].unique():
        class_data = filtered_df[filtered_df['building_class'] == building_class]

        summary_stats.append({
            'Building_Class': building_class,
            'N_Buildings': len(class_data),
            'Fragility_ID': TARGET_BUILDING_CLASSES[building_class]['fragility_id'],
            'Description': TARGET_BUILDING_CLASSES[building_class]['description']
        })

    summary_df = pd.DataFrame(summary_stats)

    print("\nFiltered Dataset Summary:")
    print("-" * 70)
    print(f"{'Class':<8} {'N_Buildings':<12} {'Fragility_ID':<12} {'Description':<25}")
    print("-" * 70)

    for _, row in summary_df.iterrows():
        print(f"{row['Building_Class']:<8} {row['N_Buildings']:<12} {row['Fragility_ID']:<12} {row['Description']:<25}")

    # Export summary
    summary_filename = f'{OUTPUT_FOLDER}filtered_exposure_summary.csv'
    summary_df.to_csv(summary_filename, index=False)
    print(f"\n✓ Summary exported to: filtered_exposure_summary.csv")

    # Overall statistics
    total_buildings = summary_df['N_Buildings'].sum()
    unique_classes = len(summary_df)

    print(f"\nOverall Statistics:")
    print(f"  Total filtered buildings: {total_buildings:,}")
    print(f"  Building classes: {unique_classes}")

else:
    print("No buildings matched the filtering criteria")

print("\n" + "=" * 70)
print("FILTERING COMPLETED")
print("=" * 70)

print("\nNext Steps:")
print("1. Review filtered_exposure_master.csv in OUTPUT folder")
print("2. Use this filtered dataset in Script 1 for fragility analysis")
print("3. Verify building counts meet analysis requirements")

print("\n" + "=" * 70)


FILTERING MASTER EXPOSURE MODEL

Processing Master Exposure Model:
----------------------------------------
✓ Loaded 16,976 records
Columns available: ['ID', 'Location', 'Region', 'Latitute', 'Longitude', 'DG', 'Material LLRS', 'LLRS', 'Height', 'Date of construction', 'Occupancy', 'Structural Irregularity', 'Roof']

Sample building data:
  Material LLRS: ['MCF+MO', 'EU+ETR', 'EU+ETO', 'MUR+ETO+ABO', 'MCF+CLBRH+MO']
  Height: ['H:2', 'H:1', 'H:3', 'H:4', 'H99']
  Occupancy: ['MIX', 'COM:5C', 'RES', 'HEA:1', 'EDU:2']
  DG: ['DG1', 'DG5', 'DG3', 'DG2', 'DG4']

Applying building classification...
  MCF_H2: Found 1011 matches
    Materials: {'MCF': np.int64(1011)}
    Heights: {'H:2': np.int64(1011)}
  MCF_H3: Found 151 matches
    Materials: {'MCF': np.int64(151)}
    Heights: {'H:3': np.int64(129), 'HBET:3-6': np.int64(22)}
  EU_H1: Found 1346 matches
    Materials: {'E': np.int64(1310), 'EU': np.int64(36)}
    Heights: {'H:1': np.int64(1346)}
✓ Total buildings matched to target classes