In [None]:
# ---------- CONFIG ----------
DRIVE_MOUNT_PATH = '/content/drive'
CSV_PATH = DRIVE_MOUNT_PATH + '/MyDrive/TG/embeddings/pd.csv' # change to your CSV
TEXT_DIR = DRIVE_MOUNT_PATH + '/MyDrive/TG/embeddings/text_embeddings_PD' # ClinicalBERT .npy files
IMG_DIR = DRIVE_MOUNT_PATH + '/MyDrive/TG/embeddings/mri_embeddings' # flattened SAMMed3D .npy files (or 1x48x... flattened)
OUTPUT_CSV = DRIVE_MOUNT_PATH + '/MyDrive/TG/embeddings/labels_HY_filtered.csv'
PCA_PATH = DRIVE_MOUNT_PATH + '/MyDrive/embeddings/img_pca.joblib'
FLAT_DIR = DRIVE_MOUNT_PATH + '/MyDrive/embeddings/flattened' # flattened SAMMed3D
PCA_N_COMPONENTS = 1024 # reduce flattened vector to this size (tuneable)
PROJ_DIM = 256 # projection dimension for both modalities
BATCH_SIZE = 8
EPOCHS = 30
LR = 1e-4
NUM_CLASSES = 6 # H&Y 0-5
RANDOM_SEED = 42

In [None]:

from google.colab import drive
drive.mount(DRIVE_MOUNT_PATH, force_remount=False)

Mounted at /content/drive


## imports

In [None]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.decomposition import IncrementalPCA
import joblib
from sklearn.model_selection import train_test_split
import random

In [None]:
# ---------- UTIL / SETUP ----------
def seed_all(seed=RANDOM_SEED):
  random.seed(seed)
  np.random.seed(seed)
  torch.manual_seed(seed)
  if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)


seed_all()

## load csv

In [None]:
# ---------- 1) Load CSV and filter IDs by .npy presence ----------
print('Loading CSV and filtering IDs...')
if not os.path.exists(CSV_PATH):
  raise FileNotFoundError(f'CSV not found: {CSV_PATH}')


df = pd.read_csv(CSV_PATH)
# Adjust column names below if different in your CSV
ID_COL = 'PATNO' if 'PATNO' in df.columns else ('ID' if 'ID' in df.columns else df.columns[0])
HY_COL = 'NHY' if 'NHY' in df.columns else ('HY' if 'HY' in df.columns else None)
if HY_COL is None:
  raise ValueError('Cannot find a Hoehn&Yahr column (NHY or HY). Update HY_COL variable')


# Keep only ID and HY
df = df[[ID_COL, HY_COL]].copy()
# normalize types
df[ID_COL] = df[ID_COL].astype(str)
# ensure HY numeric
df[HY_COL] = pd.to_numeric(df[HY_COL], errors='coerce')
df = df.dropna(subset=[HY_COL])
# convert HY to int within 0-5
df[HY_COL] = df[HY_COL].astype(int).clip(0,5)


# list .npy files present
text_files = [f for f in os.listdir(TEXT_DIR) if f.endswith('.npy')]
img_files = [f for f in os.listdir(IMG_DIR) if f.endswith('.npy')]
text_ids = set([os.path.splitext(f)[0] for f in text_files])
img_ids = set([os.path.splitext(f)[0] for f in img_files])
valid_ids = sorted(list(text_ids.intersection(img_ids)))
print(f'Found {len(text_ids)} text .npy, {len(img_ids)} img .npy, intersection {len(valid_ids)}')


# filter df to valid ids
df = df[df[ID_COL].isin(valid_ids)].copy()
print('After filtering CSV by available npys:', len(df))
# save filtered CSV
out_df = df.rename(columns={ID_COL:'id', HY_COL:'hy'})[['id','hy']]
out_df.to_csv(OUTPUT_CSV, index=False)
print('Saved filtered CSV:', OUTPUT_CSV)




Loading CSV and filtering IDs...
Found 1560 text .npy, 259 img .npy, intersection 190
After filtering CSV by available npys: 190
Saved filtered CSV: /content/drive/MyDrive/TG/embeddings/labels_HY_filtered.csv


In [None]:
# ---------- 1) Load CSV and filter IDs by .npy presence ----------
print('Loading CSV and filtering IDs...')
if not os.path.exists(CSV_PATH):
  raise FileNotFoundError(f'CSV not found: {CSV_PATH}')


df = pd.read_csv(CSV_PATH)
# Adjust column names below if different in your CSV
ID_COL = 'PATNO' if 'PATNO' in df.columns else ('ID' if 'ID' in df.columns else df.columns[0])
MBRS_COL = 'MBRS' if 'MBRS' in df.columns else ('MBRS' if 'MBRS' in df.columns else None)
if MBRS_COL is None:
  raise ValueError('Cannot find a Hoehn&Yahr column (MBRS or MBRS). Update MBRS_COL variable')


# Keep only ID and MBRS
df = df[[ID_COL, MBRS_COL]].copy()
# normalize types
df[ID_COL] = df[ID_COL].astype(str)
# ensure MBRS numeric
df[MBRS_COL] = pd.to_numeric(df[MBRS_COL], errors='coerce')
df = df.dropna(subset=[MBRS_COL])

# convert MBRS to int within 0-5
def map_mbrs_to_3(x):
    if 0 <= x <= 7:
        return 0
    elif 8 <= x <= 15:
        return 1
    else:
        return 2

df['MBRS_COL'] = df[MBRS_COL].apply(map_mbrs_to_3)
print(df)

# list .npy files present
text_files = [f for f in os.listdir(TEXT_DIR) if f.endswith('.npy')]
img_files = [f for f in os.listdir(IMG_DIR) if f.endswith('.npy')]
text_ids = set([os.path.splitext(f)[0] for f in text_files])
img_ids = set([os.path.splitext(f)[0] for f in img_files])
valid_ids = sorted(list(text_ids.intersection(img_ids)))
print(f'Found {len(text_ids)} text .npy, {len(img_ids)} img .npy, intersection {len(valid_ids)}')


# filter df to valid ids
df = df[df[ID_COL].isin(valid_ids)].copy()
print('After filtering CSV by available npys:', len(df))
# save filtered CSV
out_df = df.rename(columns={ID_COL:'id', MBRS_COL:'MBRS'})[['id','MBRS_COL']]
out_df.to_csv(OUTPUT_CSV, index=False)
print('Saved filtered CSV:', OUTPUT_CSV)




Loading CSV and filtering IDs...
       PATNO  MBRS  MBRS_COL
0       3001  13.0         1
1       3002  14.0         1
2       3003   8.0         1
3       3005   0.0         0
4       3006  14.0         1
...      ...   ...       ...
1555  476236   3.0         0
1556  492876   6.0         0
1557  496671   1.0         0
1558  504436   4.0         0
1559  522583   2.0         0

[1560 rows x 3 columns]
Found 1560 text .npy, 259 img .npy, intersection 190
After filtering CSV by available npys: 190
Saved filtered CSV: /content/drive/MyDrive/TG/embeddings/labels_HY_filtered.csv


In [None]:
out_df

Unnamed: 0,id,MBRS_COL
39,3102,1
40,3105,2
41,3107,0
42,3108,0
44,3111,1
...,...,...
695,51632,1
696,51731,1
706,53060,0
718,55395,0


## PCA

In [None]:
shapes = {}
for id_ in valid_ids:
    arr = np.load(os.path.join(IMG_DIR, id_ + ".npy"))
    shapes[id_] = arr.shape

shapes


{'3102': (2, 48, 32, 32, 32),
 '3105': (2, 48, 32, 32, 32),
 '3107': (2, 48, 32, 32, 32),
 '3108': (2, 48, 32, 32, 32),
 '3111': (2, 48, 32, 32, 32),
 '3113': (2, 48, 32, 32, 32),
 '3116': (2, 48, 32, 32, 32),
 '3118': (2, 48, 32, 32, 32),
 '3119': (2, 48, 32, 32, 32),
 '3120': (2, 48, 32, 32, 32),
 '3122': (2, 48, 32, 32, 32),
 '3123': (2, 48, 32, 32, 32),
 '3124': (2, 48, 32, 32, 32),
 '3125': (2, 48, 32, 32, 32),
 '3126': (2, 48, 32, 32, 32),
 '3127': (2, 48, 32, 32, 32),
 '3128': (2, 48, 32, 32, 32),
 '3129': (2, 48, 32, 32, 32),
 '3130': (2, 48, 32, 32, 32),
 '3131': (2, 48, 32, 32, 32),
 '3132': (2, 48, 32, 32, 32),
 '3134': (2, 48, 32, 32, 32),
 '3150': (2, 48, 32, 32, 32),
 '3154': (2, 48, 32, 32, 32),
 '3166': (2, 48, 32, 32, 32),
 '3167': (2, 48, 32, 32, 32),
 '3168': (2, 48, 32, 32, 32),
 '3173': (2, 48, 32, 32, 32),
 '3174': (2, 48, 32, 32, 32),
 '3175': (2, 48, 32, 32, 32),
 '3176': (2, 48, 32, 32, 32),
 '3178': (2, 48, 32, 32, 32),
 '3179': (2, 48, 32, 32, 32),
 '3181': (

In [None]:
# ----------------------------------------------------
# Filtrar archivos .npy que NO tengan 2 volúmenes
# ----------------------------------------------------
valid_ids = []
bad_ids = []

for f in os.listdir(IMG_DIR):
    if not f.endswith(".npy"):
        continue
    id_ = os.path.splitext(f)[0]
    arr = np.load(os.path.join(IMG_DIR, f))

    # condición: queremos shape (2,48,32,32,32)
    if arr.ndim == 5 and arr.shape[0] == 2:
        valid_ids.append(id_)
    else:
        bad_ids.append(id_)

print("IDs válidos:", len(valid_ids))
print("IDs eliminados por tener shape != (2,48,32,32,32):", len(bad_ids))
print("Ejemplo de eliminados:", bad_ids[:10])

# Filtrar el dataframe original
df = df[df[ID_COL].isin(valid_ids)].reset_index(drop=True)
print("Sujetos finales después del filtrado:", len(df))


IDs válidos: 257
IDs eliminados por tener shape != (2,48,32,32,32): 2
Ejemplo de eliminados: ['3829', '53060']
Sujetos finales después del filtrado: 188


In [None]:
bad_ids

['3829', '53060']

In [None]:
print(arr.shape)

(2, 48, 32, 32, 32)


In [None]:
# ---------- CONFIG ----------
# PCA_N_COMPONENTS = 1024 # reduce flattened vector to this size (tuneable)
# --------------------------
PCA_N_COMPONENTS = min(PCA_N_COMPONENTS, len(valid_ids))
# ---------- 2) Fit IncrementalPCA on flattened image embeddings (if PCA not exists) ----------
# PCA is important because flattened vectors are huge; we reduce to PCA_N_COMPONENTS
import os
import numpy as np
import joblib
from sklearn.decomposition import IncrementalPCA
import torch
import torch.nn.functional as F

os.makedirs(FLAT_DIR, exist_ok=True)
DOWNSAMPLE_SIZE = (16, 16, 16)
# ======================================


# --------------------------
# Función para downsamplear un embedding 3D pesado
# --------------------------
def downsample_3d(arr):
    # arr viene como (2, D, H, W)

    # si tiene 5 dimensiones, combina los 2 volúmenes
    if arr.ndim == 5 and arr.shape[0] == 2:
        arr = arr.mean(axis=0)   # ---> (48,32,32,32)

    # si aún tiene 4 dimensiones, eliminar ejes extra si son 1 o canal
    if arr.ndim == 4:
        # caso típico (D,H,W,C) o (C,D,H,W)
        if arr.shape[0] not in [32,48]:  # entonces es canal
            arr = arr[0]
        else:
            arr = arr[...,0]  # último canal

    # ahora debe ser 3D
    if arr.ndim != 3:
        raise ValueError(f"Se esperaba un 3D, llegó: {arr.shape}")

    # downsample con PyTorch
    t = torch.from_numpy(arr).float().unsqueeze(0).unsqueeze(0)  # (1,1,D,H,W)
    t_ds = F.interpolate(t, size=(16,16,16), mode="trilinear", align_corners=False)
    return t_ds.squeeze().numpy()


# ============================================================
#          1) GENERAR ARCHIVOS MEMMAP (uno por ID)
# ============================================================
print("=== Paso 1/3: Generando memmaps downsampleados ===")

for id_ in valid_ids:
    npy_path = os.path.join(IMG_DIR, id_ + ".npy")
    if not os.path.exists(npy_path):
        print("No existe:", npy_path)
        continue

    # cargar sin ocupar RAM
    arr = np.load(npy_path, mmap_mode="r")
    flat = downsample_3d(arr)

    mm_path = os.path.join(FLAT_DIR, id_ + ".dat")

    # guardar como memmap
    fp = np.memmap(mm_path, dtype="float32", mode="w+", shape=flat.shape)
    fp[:] = flat[:]
    del fp  # liberar
print("Listo memmaps.")


# ============================================================
#           2) AJUSTAR INCREMENTAL PCA SOBRE MEMMAPS
# ============================================================
if os.path.exists(PCA_PATH):
    print("Cargando PCA existente:", PCA_PATH)
    pca = joblib.load(PCA_PATH)
else:
    print("=== Paso 2/3: Entrenando IncrementalPCA ===")

    pca = IncrementalPCA(n_components=PCA_N_COMPONENTS)
    files = sorted(os.listdir(FLAT_DIR))
    batch_size = 1024   # pequeño = no explota Colab

    for i in range(0, len(files), batch_size):
        batch_files = files[i:i+batch_size]
        mats = []

        for f in batch_files:
            fp = np.memmap(os.path.join(FLAT_DIR, f), dtype="float32", mode="r")
            mats.append(np.array(fp))

        X = np.stack(mats)
        pca.partial_fit(X)

        print(f"PCA parcial: {i+len(batch_files)}/{len(files)}")

    joblib.dump(pca, PCA_PATH)
    print("PCA guardado en", PCA_PATH)


# ============================================================
#      3) Función para transformar un embedding con PCA
# ============================================================
def img_flatten_and_pca(path, pca_obj):
    arr = np.load(path, mmap_mode="r")
    flat = downsample_3d(arr)
    vec = pca_obj.transform(flat.reshape(1, -1))
    return vec.ravel().astype("float32")


print("=== PCA LISTO ===")


=== Paso 1/3: Generando memmaps downsampleados ===
Listo memmaps.
Cargando PCA existente: /content/drive/MyDrive/embeddings/img_pca.joblib
=== PCA LISTO ===


## Dataset and dataloader

In [None]:
class PDFlattenedDataset(Dataset):
  def __init__(self, csv_path, text_dir, flat_dir, pca_obj=None, transform=None):
    self.df = pd.read_csv(csv_path)
    self.text_dir = text_dir
    self.flat_dir = flat_dir
    self.pca = pca_obj
    self.transform = transform


  def __len__(self):
    return len(self.df)


  def __getitem__(self, idx):
    row = self.df.iloc[idx]
    id_ = str(row['id'])
    MBRS = int(row['MBRS_COL'])


    text_emb = np.load(os.path.join(self.text_dir, id_ + '.npy')).astype(np.float32)
    mm_path = os.path.join(self.flat_dir, id_ + ".dat")
    arr = np.memmap(mm_path, dtype='float32', mode='r')

    # apply PCA
    img_vec = self.pca.transform(arr.reshape(1, -1)).ravel().astype("float32")


    # convert to tensors
    txt_t = torch.tensor(text_emb, dtype=torch.float32)
    img_t = torch.tensor(img_vec, dtype=torch.float32)
    label = torch.tensor(MBRS, dtype=torch.long)


    return txt_t, img_t, label

#
out_df = out_df[~out_df['id'].isin(bad_ids)].reset_index(drop=True)

# split train/val/test
train_df, test_df = train_test_split(out_df, test_size=0.2, stratify=out_df['MBRS_COL'], random_state=RANDOM_SEED)
train_df, val_df = train_test_split(train_df, test_size=0.125, stratify=train_df['MBRS_COL'], random_state=RANDOM_SEED)
# results: train 70%, val 10%, test 20%
train_csv = os.path.join(os.path.dirname(OUTPUT_CSV), 'train_labels.csv')
val_csv = os.path.join(os.path.dirname(OUTPUT_CSV), 'val_labels.csv')
test_csv = os.path.join(os.path.dirname(OUTPUT_CSV), 'test_labels.csv')
train_df.to_csv(train_csv, index=False)
val_df.to_csv(val_csv, index=False)
test_df.to_csv(test_csv, index=False)
print('Split saved:', train_csv, val_csv, test_csv)


train_ds = PDFlattenedDataset(train_csv, TEXT_DIR, FLAT_DIR, pca)
val_ds = PDFlattenedDataset(val_csv, TEXT_DIR, FLAT_DIR, pca)
test_ds = PDFlattenedDataset(test_csv, TEXT_DIR, FLAT_DIR, pca)


train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False)

Split saved: /content/drive/MyDrive/TG/embeddings/train_labels.csv /content/drive/MyDrive/TG/embeddings/val_labels.csv /content/drive/MyDrive/TG/embeddings/test_labels.csv


In [None]:
# ---------- 4) Model definition ----------
class FusionFC(nn.Module):
  def __init__(self, text_dim, img_dim, proj_dim=PROJ_DIM, num_classes=NUM_CLASSES):
    super().__init__()
    self.text_proj = nn.Sequential(
    nn.Linear(text_dim, proj_dim),
    nn.ReLU(),
    nn.LayerNorm(proj_dim)
    )
    self.img_proj = nn.Sequential(
    nn.Linear(img_dim, proj_dim),
    nn.ReLU(),
    nn.LayerNorm(proj_dim)
    )
    self.classifier = nn.Sequential(
    nn.Linear(proj_dim*2, 512),
    nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(512, num_classes)
    )


  def forward(self, txt, img):
    zt = self.text_proj(txt)
    zi = self.img_proj(img)
    fused = torch.cat([zt, zi], dim=1)
    return self.classifier(fused), zt, zi

In [None]:
# obtain dims from sample
sample_txt = np.load(os.path.join(TEXT_DIR, valid_ids[0] + '.npy')).astype(np.float32)
sample_img_flat = np.load(os.path.join(IMG_DIR, valid_ids[0] + '.npy'))
if sample_img_flat.ndim > 1 and sample_img_flat.shape[0] == 1:
  sample_img_flat = sample_img_flat[0]
sample_img_flat = sample_img_flat.flatten().astype(np.float32)


text_dim = sample_txt.shape[0]
img_dim = pca.n_components_ if hasattr(pca, 'n_components_') else PCA_N_COMPONENTS
print('text_dim', text_dim, 'img_dim (after PCA)', img_dim)

text_dim 768 img_dim (after PCA) 257


## training

In [None]:
# ---------- 5) Training loop (contrastive + classification optional) ----------
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = FusionFC(text_dim=text_dim, img_dim=img_dim).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
criterion = nn.CrossEntropyLoss()


# optional: use contrastive loss too (weighted)
use_contrastive = True
contrastive_weight = 0.5


def contrastive_loss(zt, zi, temperature=0.07):
  logits = (zt @ zi.t()) / temperature
  labels = torch.arange(logits.size(0), device=logits.device)
  loss_t = F.cross_entropy(logits, labels)
  loss_i = F.cross_entropy(logits.t(), labels)
  return (loss_t + loss_i) / 2


# training
best_val = 1e9
for epoch in range(EPOCHS):
  model.train()
  total_loss = 0
  total_acc = 0
  count = 0
  for txt, img, y in train_loader:
    txt = txt.to(device); img = img.to(device); y = y.to(device)
    optimizer.zero_grad()
    logits, zt, zi = model(txt, img)
    loss_cls = criterion(logits, y)
    if use_contrastive:
      loss_ctr = contrastive_loss(zt, zi)
      loss = (1-contrastive_weight)*loss_cls + contrastive_weight*loss_ctr
    else:
      loss = loss_cls
    loss.backward()
    optimizer.step()


    total_loss += loss.item() * txt.size(0)
    preds = logits.argmax(dim=1)
    total_acc += (preds == y).sum().item()
    count += txt.size(0)


  train_loss = total_loss / count
  train_acc = total_acc / count


  # validation
  model.eval()
  val_loss = 0
  val_acc = 0
  vcount = 0
  with torch.no_grad():
    for txt, img, y in val_loader:
      txt = txt.to(device); img = img.to(device); y = y.to(device)
      logits, zt, zi = model(txt, img)
      loss_cls = criterion(logits, y)
      if use_contrastive:
        loss_ctr = contrastive_loss(zt, zi)
        loss = (1-contrastive_weight)*loss_cls + contrastive_weight*loss_ctr
      else:
        loss = loss_cls
      val_loss += loss.item() * txt.size(0)
      preds = logits.argmax(dim=1)
      val_acc += (preds == y).sum().item()
      vcount += txt.size(0)


  val_loss = val_loss / vcount
  val_acc = val_acc / vcount


  print(f'Epoch {epoch+1}/{EPOCHS} | train_loss {train_loss:.4f} acc {train_acc:.4f} | val_loss {val_loss:.4f} acc {val_acc:.4f}')


  if val_loss < best_val:
    best_val = val_loss
    torch.save(model.state_dict(), os.path.join(os.path.dirname(OUTPUT_CSV), 'best_model.pt'))
    print('Saved best model')

Epoch 1/30 | train_loss 64.6585 acc 0.4885 | val_loss 38.4780 acc 0.5789
Saved best model
Epoch 2/30 | train_loss 29.0612 acc 0.6107 | val_loss 23.3283 acc 0.5789
Saved best model
Epoch 3/30 | train_loss 18.8548 acc 0.6031 | val_loss 23.1862 acc 0.5789
Saved best model
Epoch 4/30 | train_loss 14.0392 acc 0.6031 | val_loss 23.3950 acc 0.5789
Epoch 5/30 | train_loss 16.2873 acc 0.6260 | val_loss 23.0842 acc 0.5789
Saved best model
Epoch 6/30 | train_loss 13.0997 acc 0.6336 | val_loss 20.3402 acc 0.5789
Saved best model
Epoch 7/30 | train_loss 11.5703 acc 0.6412 | val_loss 23.4408 acc 0.5789
Epoch 8/30 | train_loss 12.3102 acc 0.6489 | val_loss 15.2652 acc 0.5789
Saved best model
Epoch 9/30 | train_loss 11.7014 acc 0.6641 | val_loss 23.3864 acc 0.5789
Epoch 10/30 | train_loss 10.2883 acc 0.6718 | val_loss 19.2209 acc 0.5789
Epoch 11/30 | train_loss 9.7655 acc 0.6718 | val_loss 21.2342 acc 0.5789
Epoch 12/30 | train_loss 9.9583 acc 0.6870 | val_loss 17.3818 acc 0.5789
Epoch 13/30 | train_l

## Testing

In [None]:
model.load_state_dict(torch.load(os.path.join(os.path.dirname(OUTPUT_CSV), 'best_model.pt')))
model.eval()
from sklearn.metrics import classification_report, confusion_matrix
all_y = []
all_p = []
with torch.no_grad():
  for txt, img, y in test_loader:
    txt = txt.to(device); img = img.to(device)
    logits, zt, zi = model(txt, img)
    preds = logits.argmax(dim=1).cpu().numpy().tolist()
    all_p.extend(preds)
    all_y.extend(y.numpy().tolist())


print('Classification report (test):')
print(classification_report(all_y, all_p, digits=4))
print('Confusion matrix:')
print(confusion_matrix(all_y, all_p))


# Save PCA and model artifacts
joblib.dump(pca, PCA_PATH)
print('PCA & model saved to drive')

Classification report (test):
              precision    recall  f1-score   support

           0     0.5625    0.8182    0.6667        22
           1     0.3333    0.1429    0.2000        14
           2     0.0000    0.0000    0.0000         2

    accuracy                         0.5263        38
   macro avg     0.2986    0.3203    0.2889        38
weighted avg     0.4485    0.5263    0.4596        38

Confusion matrix:
[[18  4  0]
 [12  2  0]
 [ 2  0  0]]
PCA & model saved to drive


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
out_df['MBRS_COL'].value_counts()


Unnamed: 0_level_0,count
MBRS_COL,Unnamed: 1_level_1
0,111
1,68
2,9
