In [1]:
# ----------------------------------------------------------
# Cell 1 â€” Load layer-1 and layer-2 activations + standardize
# ----------------------------------------------------------
from pathlib import Path
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Paths
project_root = Path(__file__).resolve().parent.parent if "__file__" in globals() else Path.cwd().resolve().parent
act_dir = project_root / "results" / "activations"

# Files produced earlier
l1_path = act_dir / "adult_dnn_layer1.npy"
l2_path = act_dir / "adult_dnn_layer2.npy"
meta_path = act_dir / "adult_dnn_metadata.csv"

# Load
L1 = np.load(l1_path)          # shape: (n_samples, n_units_l1)
L2 = np.load(l2_path)          # shape: (n_samples, n_units_l2)
meta = pd.read_csv(meta_path)  # columns: sex, race, y_true, y_prob, y_pred

# Basic checks
assert L1.shape[0] == L2.shape[0] == len(meta), "Mismatched sample counts."

# Standardize each layer (per feature) so neither dominates
sc1 = StandardScaler(with_mean=True, with_std=True)
sc2 = StandardScaler(with_mean=True, with_std=True)
L1z = sc1.fit_transform(L1)
L2z = sc2.fit_transform(L2)

# Concatenate features from both layers (simple joint view)
X_joint = np.hstack([L1z, L2z])   # shape: (n_samples, n_units_l1 + n_units_l2)

print("Loaded and standardized:")
print(f"  Layer1: {L1.shape}  -> standardized {L1z.shape}")
print(f"  Layer2: {L2.shape}  -> standardized {L2z.shape}")
print(f"  Joint : {X_joint.shape}")
print("\nMetadata preview:")
print(meta.head(3))


Loaded and standardized:
  Layer1: (11306, 64)  -> standardized (11306, 64)
  Layer2: (11306, 32)  -> standardized (11306, 32)
  Joint : (11306, 96)

Metadata preview:
      sex   race  y_true    y_prob  y_pred
0    Male  White       0  0.097896       0
1  Female  White       0  0.012861       0
2    Male  White       0  0.182906       0
