In [1]:
import numpy as np
import pandas as pd

  from pandas.core import (


In [2]:
np.random.seed(42)

In [3]:
CALIBRATION = {
    "ai_present_prob": 0.78,  # McKinsey AI adoption benchmark (~78% orgs with AI use)
    "adoption_probs": {       # plausible distribution among those using AI
        "Reactive": 0.35,
        "Strategic": 0.42,
        "Transformative": 0.23
    },
    "training_probs": {       # plausible manager training prevalence by adoption
        "Reactive": 0.30,
        "Strategic": 0.55,
        "Transformative": 0.75
    },
    "governance_probs": {     # plausible governance adoption
        "Reactive": 0.30,
        "Strategic": 0.50,
        "Transformative": 0.80
    },
    "sector_probs": {         # externally plausible firm sector mix (OECD/enterprise surveys)
        "Technology": 0.25,
        "Finance": 0.20,
        "Healthcare": 0.15,
        "Manufacturing": 0.18,
        "Education": 0.10,
        "Services": 0.12
    },
    "hybrid_mean": 35,        # Owl Labs hybrid participation mean (~30–45%)
    "hybrid_std": 15
}

In [4]:
N = 847
rows = []

for i in range(N):
    org_id = f"ORG{i+1:04d}"

    # Sector drawing (based on enterprise mix benchmarks)
    sector = np.random.choice(
        list(CALIBRATION["sector_probs"].keys()),
        p=list(CALIBRATION["sector_probs"].values())
    )

    # AI presence based on McKinsey adoption rate
    ai_present = int(np.random.binomial(1, CALIBRATION["ai_present_prob"]))

    if ai_present:
        adoption_level = np.random.choice(
            list(CALIBRATION["adoption_probs"].keys()),
            p=list(CALIBRATION["adoption_probs"].values())
        )
        manager_training = int(np.random.binomial(1, CALIBRATION["training_probs"][adoption_level]))
        governance_present = int(np.random.binomial(1, CALIBRATION["governance_probs"][adoption_level]))
        ai_tools_count = int(np.random.poisson(lam=2 + 2 * list(CALIBRATION["adoption_probs"].keys()).index(adoption_level)))
    else:
        # No AI adoption
        adoption_level = "None"
        manager_training = 0
        governance_present = 0
        ai_tools_count = 0

    # Hybrid work share (externally plausible mean)
    hybrid_ratio_pct = float(np.clip(
        np.random.normal(CALIBRATION["hybrid_mean"], CALIBRATION["hybrid_std"]),
        0, 100
    ))

    # Productivity change proxy (synthetic, influenced by adoption level)
    base_prod = {"None": 0.5, "Reactive": 2.0, "Strategic": 5.0, "Transformative": 9.0}
    productivity_change_pct = round(base_prod.get(adoption_level, 0.5) + np.random.normal(0, 2), 2)

    # Employee trust score (1–5 scale)
    trust_base = 2.8 if governance_present == 0 else 3.6
    employee_trust_score = int(np.clip(round(np.random.normal(trust_base, 0.7)), 1, 5))

    # Performance outcome status (simple composite threshold)
    perf_metric = productivity_change_pct * (employee_trust_score / 5.0)
    performance_outcome = int(perf_metric > 3.5)

    rows.append({
        "org_id": org_id,
        "sector": sector,
        "ai_present": ai_present,
        "adoption_level": adoption_level,
        "ai_tools_count": ai_tools_count,
        "manager_training": manager_training,
        "governance_present": governance_present,
        "hybrid_ratio_pct": hybrid_ratio_pct,
        "productivity_change_pct": productivity_change_pct,
        "employee_trust_score": employee_trust_score,
        "performance_outcome": performance_outcome
    })

df = pd.DataFrame(rows)

In [5]:
print("=== Calibration Summary ===")
print("AI present fraction:", df["ai_present"].mean())
print("Adoption distribution among AI orgs:")
print(df[df["ai_present"] == 1]["adoption_level"].value_counts(normalize=True))
print("Overall manager training rate:", df["manager_training"].mean())
print("Overall governance rate:", df["governance_present"].mean())
print("Mean hybrid_ratio_pct:", df["hybrid_ratio_pct"].mean())

=== Calibration Summary ===
AI present fraction: 0.7933884297520661
Adoption distribution among AI orgs:
adoption_level
Strategic         0.427083
Reactive          0.349702
Transformative    0.223214
Name: proportion, dtype: float64
Overall manager training rate: 0.3707201889020071
Overall governance rate: 0.371900826446281
Mean hybrid_ratio_pct: 34.66300385141294


In [6]:
df.to_csv("calibrated_synthetic_organisations_847.csv", index=False)
print("\nSaved as: calibrated_synthetic_organisations_847.csv")


Saved as: calibrated_synthetic_organisations_847.csv
