# Initialisation: Revision Record

In [1]:
from ReIDF import df_CLS, eg_CLS
updatedIDF='idf-Oct/GEN_ReIDF/HVACDetailed-Oct-13_modified_2.idf'
df_1= df_CLS(updatedIDF, 'BuildingSurface:Detailed')
print(df_1['Field 2'].unique())
# filtered_df = df_1[df_1['Field 2'] == 'Floor,']
# filtered_df.head()

['Floor,' 'Wall,' 'Roof,' 'Ceiling,']


In [2]:
import re
import numpy as np
import pandas as pd

# -----------------------------
# Inputs
# -----------------------------
# Assumes df_1 already exists (your raw data). If it's named differently, rename here:
df = df_1.copy()

# -----------------------------
# Helpers
# -----------------------------
num = r'[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?'  # float or scientific
sci_pat = re.compile(r'[eE][+-]?\d+$')    # detects scientific notation at the end of a number

def is_na_like(v) -> bool:
    if v is None:
        return True
    s = str(v).strip().lower()
    return s in {"", "na", "n/a", "nan", "none"}

def pick_last_coord_field(row, field_names):
    """Return the LAST non-NA field that ends with ';'. If none, last non-NA."""
    for col in reversed(field_names):
        val = row.get(col)
        if not is_na_like(val):
            s = str(val).strip()
            if s.endswith(';'):
                return s
    for col in reversed(field_names):
        val = row.get(col)
        if not is_na_like(val):
            return str(val).strip()
    return None

def extract_z_with_flag(text):
    """
    Extract the last numeric before a trailing ';'.
    Returns (z_float or np.nan, sci_flag: bool, note: str|None).
    Flags when the captured token uses scientific notation (e±n).
    """
    if not text:
        return np.nan, False, None
    m = re.search(fr'({num})(?=;\s*$)', str(text))
    if not m:
        return np.nan, False, None

    token = m.group(1)
    sci_flag = bool(sci_pat.search(token))
    z = float(token)

    note = None
    if sci_flag:
        if abs(z) < 1e-8:
            note = "Scientific notation and ~0; plausibly negligible rounding artifact — verify."
        else:
            note = "Scientific notation (e±n) — verify source/precision."

    return z, sci_flag, note

# Canonical z→floor mapping
z_to_floor = {-2.99: -1, 0.00: 0, 4.50: 1, 8.00: 2, 11.50: 3, 15.00: 4, 18.50: 5, 22.00: 6}

def map_floor_from_z(z, mapping, tol=0.05):
    if pd.isna(z):
        return pd.NA
    zr = round(float(z), 2)
    if zr in mapping:
        return mapping[zr]
    nearest_key = min(mapping, key=lambda k: abs(zr - k))
    return mapping[nearest_key] if abs(zr - nearest_key) <= tol else pd.NA

# -----------------------------
# 1) Filter to "Floor" rows
# -----------------------------
df['Field 2'] = df['Field 2'].astype(str).str.strip().str.replace(',', '', regex=False)
filtered_df = df[df['Field 2'].eq('Floor')].copy()

# -----------------------------
# 2) Pull last coord from Field 10..19 and extract z (+ flags)
# -----------------------------
coord_fields = [f'Field {i}' for i in range(10, 20)]

filtered_df['coord_text'] = filtered_df.apply(
    lambda r: pick_last_coord_field(r, coord_fields), axis=1
)

out = filtered_df['coord_text'].apply(extract_z_with_flag)
filtered_df[['z_value', 'z_sci_flag', 'z_warning']] = pd.DataFrame(out.tolist(), index=filtered_df.index)

# Keep original z and a cleaned zone name
filtered_df['z_original'] = filtered_df['z_value']
filtered_df['zone'] = filtered_df['Field 4'].astype(str).str.strip().str.rstrip(',')

# -----------------------------
# 3) Build an "errors/warnings" view by zone
# -----------------------------
# Define issues: any scientific-notation capture OR failed parse (NaN)
issue_mask = filtered_df['z_sci_flag'] | filtered_df['z_value'].isna()

errors_df = (
    filtered_df.loc[issue_mask, ['zone', 'coord_text', 'z_original', 'z_sci_flag', 'z_warning']]
               .copy()
)

# Per-zone rollup
errors_by_zone = (
    errors_df.groupby('zone')
             .agg(
                 n_issues=('zone', 'size'),
                 n_sci=('z_sci_flag', 'sum'),
                 n_nan=('z_original', lambda s: int(s.isna().sum())),
             )
             .reset_index()
             .sort_values(['n_issues', 'zone'], ascending=[False, True])
)

# (Optional) save for audit
# Make sure the 'ref' folder exists or change the path as needed
errors_by_zone
errors_df

Unnamed: 0_level_0,zone,coord_text,z_original,z_sci_flag,z_warning
Unit Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"Zone1-2_Cafe_c039ba0e..Face0,",Zone1-2_Cafe_c039ba0e,"-60.5405782407266, 14.9612422918627, 8.8817841...",8.881784e-16,True,Scientific notation and ~0; plausibly negligib...
"Zone1-2_Cafe_c039ba0e..Face13,",Zone1-2_Cafe_c039ba0e,"-64.5190811327873, -3.16815853229159, 5.046888...",5.046889e-15,True,Scientific notation and ~0; plausibly negligib...
"Zone1-2_Cafe_c039ba0e..Face5,",Zone1-2_Cafe_c039ba0e,"-58.3026808248929, 6.7641569351663, 1.63628749...",1.636287e-15,True,Scientific notation and ~0; plausibly negligib...
"Zone5_RRooms_6_ff18170c..Face0,",Zone5_RRooms_6_ff18170c,"-67.5469825691303, 13.0484143221001, 3.5527136...",3.552714e-15,True,Scientific notation and ~0; plausibly negligib...
"Zone5_RRooms_6_ff18170c..Face2,",Zone5_RRooms_6_ff18170c,"-65.3090851532966, 4.85132896540369, 3.9490401...",3.94904e-15,True,Scientific notation and ~0; plausibly negligib...
"Zone7Shaft_6_667e71f6..Face0,",Zone7Shaft_6_667e71f6,"-68.84931976515, 12.6928614738995, 3.552713678...",3.552714e-15,True,Scientific notation and ~0; plausibly negligib...
"Zone7Shaft_6_667e71f6..Face3,",Zone7Shaft_6_667e71f6,"-66.6114223493164, 4.4957761172031, 9.53758630...",9.537586e-15,True,Scientific notation and ~0; plausibly negligib...


In [3]:
import numpy as np
import pandas as pd

# Assumes `filtered_df` from snippet #1 is present with:
# ['zone','coord_text','z_value','z_original','z_sci_flag','z_warning']

# -----------------------------
# Stabilize z for mapping (same as before)
# -----------------------------
# Clamp tiny values to 0 and round
filtered_df.loc[filtered_df['z_value'].abs() < 1e-6, 'z_value'] = 0.0
filtered_df['z_value'] = filtered_df['z_value'].round(2)

# Canonical z→floor mapping (same as before)
z_to_floor = {-2.99: -1, 0.00: 0, 4.50: 1, 8.00: 2, 11.50: 3, 15.00: 4, 18.50: 5, 22.00: 6}

def map_floor_from_z(z, mapping, tol=0.05):
    if pd.isna(z):
        return pd.NA
    zr = round(float(z), 2)
    if zr in mapping:
        return mapping[zr]
    nearest_key = min(mapping, key=lambda k: abs(zr - k))
    return mapping[nearest_key] if abs(zr - nearest_key) <= tol else pd.NA

# Map each row to a floor estimate (nullable int)
filtered_df['floor_est'] = filtered_df['z_value'].apply(lambda z: map_floor_from_z(z, z_to_floor)).astype('Int64')

# -----------------------------
# Summary "like before"
# -----------------------------
# floors per zone (only known floors; ignore NA)
floors_per_zone = (
    filtered_df.dropna(subset=['floor_est'])
               .groupby('zone')['floor_est']
               .agg(lambda s: sorted(pd.unique(s.astype(int))))
               .rename('floors_found')
)

# counts and flags
zone_counts = floors_per_zone.apply(len).rename('n_floors')
multi_flag = (zone_counts > 1).rename('multi_floor')

# representative z per zone (median), then primary floor
z_by_zone = (
    filtered_df.groupby('zone', as_index=False)['z_value']
               .median()
               .rename(columns={'z_value': 'z_median'})
)
z_by_zone['floor_primary'] = z_by_zone['z_median'].apply(lambda z: map_floor_from_z(z, z_to_floor)).astype('Int64')

# combine
summary = (
    z_by_zone.set_index('zone')
             .join(floors_per_zone, how='left')
             .join(zone_counts, how='left')
             .join(multi_flag, how='left')
             .reset_index()
)

# fill zones that had only NA floors with empty list / zeros
summary['floors_found'] = summary['floors_found'].apply(lambda v: v if isinstance(v, list) else [])
summary['n_floors'] = summary['n_floors'].fillna(0).astype(int)
summary['multi_floor'] = summary['multi_floor'].fillna(False).astype(bool)

# rank zones by primary floor then z
summary = summary.sort_values(['floor_primary', 'z_median'], ascending=[True, True])
summary['level_rank'] = np.arange(1, len(summary) + 1).astype(int)

# Optional: stringify floors_found for a tidy CSV column
summary['floors_found_str'] = summary['floors_found'].apply(lambda lst: '[' + ', '.join(map(str, lst)) + ']')

# -----------------------------
# Save CSVs (summary + detailed)
# -----------------------------
summary_cols = ['zone', 'z_median', 'floor_primary', 'floors_found_str', 'n_floors', 'multi_floor', 'level_rank']
summary.to_csv('ref/zones_by_floor_summary.csv', index=False, columns=summary_cols)

# Floor Areas

In [5]:
import pandas as pd
import numpy as np
import ast

# --- 1) Load data ---
summary = pd.read_csv("ref/zones_by_floor_summary.csv")   # expects columns incl. 'zone', 'floor_primary', 'floors_found_str'
areas   = pd.read_csv("ref/zone_areas.csv")               # expects columns: 'ZONE' (UPPER), 'Area [m2]'

# --- 2) Normalize/prepare keys for joining ---
def norm_zone_key(s: str) -> str:
    if pd.isna(s):
        return ""
    return str(s).strip().rstrip(",").upper()

summary['zone_key'] = summary['zone'].apply(norm_zone_key)
areas['ZONE_KEY']   = areas['ZONE'].apply(norm_zone_key)

# Parse floors_found_str back to list if present; handle missing gracefully
if 'floors_found_str' in summary.columns:
    def parse_list(x):
        if pd.isna(x) or str(x).strip() == "":
            return []
        try:
            # floors_found_str looks like "[0, 1, 2]"; ast.literal_eval handles it
            return ast.literal_eval(str(x))
        except Exception:
            return []
    summary['floors_found'] = summary['floors_found_str'].apply(parse_list)
else:
    summary['floors_found'] = [[] for _ in range(len(summary))]

# --- 3) Join areas to zones (left join from summary) ---
zones = summary.merge(
    areas[['ZONE_KEY', 'Area [m2]']],
    left_on='zone_key',
    right_on='ZONE_KEY',
    how='left'
)

# Optional: see unmatched zones
unmatched = zones[zones['Area [m2]'].isna()][['zone', 'zone_key']]
if not unmatched.empty:
    print(f"Warning: {len(unmatched)} zones had no area match in zone_areas.csv (showing first 10):")
    print(unmatched.head(10).to_string(index=False))

# --- 4) FLOOR AREAS using PRIMARY floor ---
# Use the primary floor for attribution (ignores zones where floor_primary is NA)
primary = zones.dropna(subset=['floor_primary', 'Area [m2]']).copy()
primary['floor_primary'] = primary['floor_primary'].astype('Int64')

floor_areas_primary = (
    primary.groupby('floor_primary', dropna=True, as_index=False)['Area [m2]']
           .sum()
           .rename(columns={'Area [m2]': 'Floor Area [m2]'})
           .sort_values('floor_primary')
)

# --- 5) Alternative: split a multi-floor zone's area equally across its floors ---
#   If a zone has floors_found=[0,1], each floor gets half the area; single-floor zones unaffected.
split_rows = []
for _, r in zones.iterrows():
    area = r.get('Area [m2]')
    floors = r.get('floors_found', [])
    if pd.isna(area) or area is None:
        continue
    if isinstance(floors, float) and np.isnan(floors):
        floors = []
    if not floors:
        # fall back to primary floor if floors_found is empty but primary exists
        fp = r.get('floor_primary')
        if pd.isna(fp):
            continue
        floors = [int(fp)]
    share = area / len(floors)
    for f in floors:
        split_rows.append({'floor': int(f), 'area_share': share})

if split_rows:
    split_df = pd.DataFrame(split_rows)
    floor_areas_split = (
        split_df.groupby('floor', as_index=False)['area_share']
                .sum()
                .rename(columns={'floor': 'floor', 'area_share': 'Floor Area [m2]'})
                .sort_values('floor')
    )
else:
    floor_areas_split = pd.DataFrame(columns=['floor', 'Floor Area [m2]'])

# --- 6) Save outputs ---
zones_out_cols = ['zone', 'floor_primary', 'floors_found_str', 'Area [m2]']
total_area = floor_areas_primary["Floor Area [m2]"].sum()
print(floor_areas_primary.to_string(index=False))
print(f"\nTotal Building Area: {total_area:.2f} m²")

 floor_primary  Floor Area [m2]
            -1           910.88
             0          1172.73
             1          1368.60
             2          1520.63
             3          1520.63
             4          1185.17
             5          1151.32
             6           709.70

Total Building Area: 9539.66 m²
