# gem2ems Engine â€” Usage Walkthrough

This notebook demonstrates the main features of the `gem2ems_engine`:

1. Single string translation
2. Inspecting parsed attributes
3. Reading the VC distribution and uncertainty outputs
4. Understanding modifier effects
5. Batch processing a list of GEM strings
6. Converting to a pandas DataFrame
7. Monte Carlo sampling

In [None]:
import sys
sys.path.insert(0, "../engine")

from gem2ems_engine import gem2ems, to_dataframe

eng = gem2ems()
print("Engine ready.")

---
## 1. Single string translation

In [None]:
gem_str = "CR/LFINF(MUR+CBH)+CDL+DUL/H:3/IND"

r = eng.translate(gem_str)

print(f"Input:          {r.gem_str}")
print(f"EMS type:       {r.summary['best_ems_type']}  (p = {r.summary['best_ems_weight']:.2f})")
print(f"VC class base:  {r.vc_class_base}  (before modifiers)")
print(f"VC class final: {r.vc_class}  (after modifiers)")
print(f"Confidence:     {r.confidence:.3f}")
print(f"80% cred range: {r.summary['vc_credible_range_80']}")

---
## 2. Inspecting parsed GEM attributes

In [None]:
p = r.parsed

print(f"Material:        {p['material']}")
print(f"Material L2:     {p['material_L2']}")
print(f"System (LLRS):   {p['system']}")
print(f"Infill material: {p['infill_material']}")
print(f"ERD level:       {p['erd']}")
print(f"ERD score:       {p['erd_score']:.2f}")
print(f"Code level:      {p['code_level']}")
print(f"Ductility token: {p['ductility_token']}")
print(f"Height (floors): {p['height_stories']}")
print(f"Height bin:      {p['height_bin']}")
print(f"Occupancy:       {p['occupancy']}")
print(f"Family:          {p['family']}")

---
## 3. VC distribution and uncertainty

In [None]:
import matplotlib.pyplot as plt
import numpy as np

vc_classes = list("ABCDEF")
probs_base  = [r.vc_probs_base.get(c, 0) for c in vc_classes]
probs_final = [r.vc_probs.get(c, 0) for c in vc_classes]

x = np.arange(len(vc_classes))
w = 0.35

fig, ax = plt.subplots(figsize=(7, 4))
ax.bar(x - w/2, probs_base,  w, label="Base (no modifiers)", color="steelblue", alpha=0.8)
ax.bar(x + w/2, probs_final, w, label="Final (with modifiers)", color="tomato", alpha=0.8)

ax.set_xticks(x)
ax.set_xticklabels(vc_classes, fontsize=12)
ax.set_xlabel("Vulnerability Class", fontsize=11)
ax.set_ylabel("Probability", fontsize=11)
ax.set_title(f"{r.gem_str}\nEMS: {r.summary['best_ems_type']} | confidence: {r.confidence:.2f}",
             fontsize=10)
ax.legend()
ax.set_ylim(0, 1)
plt.tight_layout()
plt.show()

print(f"EMS entropy:     {r.uncertainty['ems_entropy']:.3f}")
print(f"VC entropy base: {r.uncertainty['vc_entropy_base']:.3f}")
print(f"VC entropy final:{r.uncertainty['vc_entropy']:.3f}")
print(f"Flags:           {r.uncertainty['flags']}")
print(f"Missing features:{r.uncertainty['missing_features']}")

---
## 4. Inspecting which modifiers fired

In [None]:
print(f"Modifiers fired: {r.summary['n_modifiers_fired']}")
print(f"Cumulative shift: {r.summary['cumulative_shift']:+.2f}")
print()

for m in r.vc_modifiers_applied:
    print(f"  [{m['id']}]")
    print(f"    shift   = {m['shift']:+.2f}")
    print(f"    penalty = {m.get('confidence_penalty', 1.0):.2f}")
    print(f"    doc     = {m.get('doc', '')[:80]}")
    print()

---
## 5. Comparing different building types

In [None]:
examples = [
    ("RC infilled frame, low ERD",        "CR/LFINF(MUR+CBH)+CDL+DUL/H:3/IND"),
    ("RC wall, moderate ERD",             "CR/LWAL+CDM+DUM/H:5/IND"),
    ("RC precast frame, low ERD",         "CR+PC/LFM+CDL+DUL/H:1/IND"),
    ("Rubble stone masonry, non-ductile", "MUR+STRUB/LWAL+DNO/H:2/IND"),
    ("Fired clay brick, non-ductile",     "MUR+CLBRS/LWAL+DNO/H:3/IND"),
    ("Adobe masonry, non-ductile",        "MUR+ADO/LWAL+DNO/H:1/IND"),
    ("Steel braced frame",                "S/LFBR+CDM+DUM/H:5/IND"),
    ("Timber wall, low ERD",              "W/LWAL+CDL+DUM/H:2/IND"),
]

print(f"{'Label':<40} {'EMS':<10} {'VC base':<9} {'VC final':<10} {'Shift':>7} {'Conf':>6}")
print("-" * 90)

for label, gem in examples:
    r = eng.translate(gem)
    print(
        f"{label:<40} "
        f"{r.summary['best_ems_type']:<10} "
        f"{r.vc_class_base:<9} "
        f"{r.vc_class:<10} "
        f"{r.summary['cumulative_shift']:>+7.2f} "
        f"{r.confidence:>6.3f}"
    )

---
## 6. Batch processing and DataFrame output

In [None]:
import pandas as pd

gem_strings = [
    "CR/LFINF(MUR+CBH)+CDL+DUL/H:3/IND",
    "CR/LWAL+CDM+DUM/H:5/IND",
    "MUR+STRUB/LWAL+DNO/H:2/IND",
    "MUR+CBH/LWAL+DNO/H:4/IND",
    "MUR+ADO/LWAL+DNO/H:1/IND",
    "MUR+CLBRS/LWAL+DNO/H:3/IND",
    "S/LFBR+CDM+DUM/H:5/IND",
    "W/LWAL+CDL+DUM/H:2/IND",
    "CR/LWALL+CDL+DUL/H:UNK/IND",
    "UNK+CDL+DUM/H:3/IND",
]

results = eng.translate(gem_strings)
df = to_dataframe(results)

# Show core columns
cols = ["gem_str", "best_ems_type", "vc_class_base", "vc_class",
        "cumulative_shift", "n_modifiers_fired", "confidence"]
df[cols]

In [None]:
# VC probability columns
vc_cols = ["gem_str"] + [f"vc_probs_{c}" for c in "ABCDEF"]
df[vc_cols].round(3)

---
## 7. Monte Carlo sampling

Draw random (EMS type, VC class) pairs from the result distributions
for uncertainty propagation into a fragility or loss model.

In [None]:
import random
from collections import Counter

def sample_one(result, rng=None):
    """Draw one (ems_type, vc_class) sample from a TranslationResult."""
    if rng is None:
        rng = random.random

    # Sample EMS type
    r = rng()
    cum = 0.0
    ems = result.ems_candidates[-1]["ems_type"]
    for c in result.ems_candidates:
        cum += c["weight"]
        if r <= cum:
            ems = c["ems_type"]
            break

    # Sample VC class from final distribution
    r2 = rng()
    cum2 = 0.0
    vc = "C"
    for cls in "ABCDEF":
        cum2 += result.vc_probs.get(cls, 0.0)
        if r2 <= cum2:
            vc = cls
            break

    return ems, vc


# Example: 5000 Monte Carlo samples for one building
gem = "MUR+CBH/LWAL+DNO/H:4/IND"
result = eng.translate(gem)

n = 5000
samples = [sample_one(result) for _ in range(n)]

vc_counts = Counter(vc for _, vc in samples)
vc_freq   = {cls: vc_counts.get(cls, 0) / n for cls in "ABCDEF"}

print(f"Building: {gem}")
print(f"EMS type: {result.summary['best_ems_type']}")
print()
print("Sampled VC frequencies vs analytical distribution:")
print(f"{'VC':<5} {'Sampled':>10} {'Analytical':>12}")
print("-" * 30)
for cls in "ABCDEF":
    print(f"  {cls}   {vc_freq[cls]:>10.3f} {result.vc_probs.get(cls, 0):>12.3f}")

In [None]:
# Visualise sampled vs analytical
fig, ax = plt.subplots(figsize=(6, 4))

x = np.arange(6)
w = 0.35

analytical = [result.vc_probs.get(c, 0) for c in "ABCDEF"]
sampled    = [vc_freq.get(c, 0)          for c in "ABCDEF"]

ax.bar(x - w/2, analytical, w, label="Analytical", color="steelblue", alpha=0.85)
ax.bar(x + w/2, sampled,    w, label=f"MC sample (n={n})", color="tomato",    alpha=0.85)

ax.set_xticks(x)
ax.set_xticklabels(list("ABCDEF"), fontsize=12)
ax.set_xlabel("Vulnerability Class", fontsize=11)
ax.set_ylabel("Probability / Frequency", fontsize=11)
ax.set_title(f"Monte Carlo validation\n{gem}", fontsize=10)
ax.legend()
ax.set_ylim(0, 0.7)
plt.tight_layout()
plt.show()