# Beaver Tutorial 6: ECG Arrhythmia Classification (Data Scientist)

Iterate on a mock ECG dataset, then request the real result.

Run this alongside `06-ecg-do.ipynb` in a separate tab.


## Step 1: Setup


In [4]:
!uv pip install pandas numpy scikit-learn matplotlib -q

In [9]:
# # Uncomment for quick local testing without SyftBox
# import os
# import tempfile
# from beaver import Twin
# import beaver
# # Create temp folder for session
# temp_dir = tempfile.mkdtemp()

# # Set environment for local mode
# os.environ["BEAVER_LOCAL_MODE"] = "1"
# os.environ["BEAVER_USER"] = "bob@example.com"
# os.environ["BEAVER_SESSION_ID"] = "test_session"
# os.environ["BEAVER_LOCAL_SESSION_DIR"] = temp_dir
# os.environ["BEAVER_AUTO_ACCEPT"] = "1"

# print(f"Session dir: {temp_dir}")
# bv = beaver.ctx()
# session = bv.active_session()

In [6]:
import beaver
from beaver import Twin

bv = beaver.ctx()
session = bv.active_session()
session.reset(force=True)

print(f"You: {bv.user}")
print(f"Peer: {session.peer}")


In [10]:
# import os
# import tempfile

# # Create temp folder for session
# temp_dir = tempfile.mkdtemp()

# # Set environment for local mode
# os.environ["BEAVER_LOCAL_MODE"] = "1"
# os.environ["BEAVER_USER"] = "bob@example.com"
# os.environ["BEAVER_SESSION_ID"] = "test_session"
# os.environ["BEAVER_LOCAL_SESSION_DIR"] = temp_dir
# os.environ["BEAVER_AUTO_ACCEPT"] = "1"

# import beaver
# from beaver import Twin

# bv = beaver.ctx()
# session = bv.active_session()
# session.reset(force=True)

# print(f"You: {bv.user}")
# print(f"Peer: {session.peer}")
# print(f"Session dir: {temp_dir}")

## Step 2: Wait for ECG Twin (Mock Data)


In [None]:
ecg = session.wait_for_remote_var("ecg", timeout=600, trust_loader=True)
if not ecg:
    raise RuntimeError("Timed out waiting for ECG Twin")

mock_df = ecg.public
label_col = "type"
if label_col not in mock_df.columns:
    raise ValueError("Expected label column 'type' in mock data")

feature_cols = [c for c in mock_df.columns if c not in ("record", label_col)]

print(f"Mock rows: {len(mock_df)}")
print(f"Features: {len(feature_cols)}")
print(f"Classes: {mock_df[label_col].value_counts().to_dict()}")

## Step 3: Define Analysis + Plot


In [4]:
@bv
def train_ecg_classifier(df):
    import pandas as pd
    import numpy as np
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import LabelEncoder, StandardScaler
    from sklearn.linear_model import LogisticRegression
    from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
    import matplotlib.pyplot as plt

    label_col = "type"
    if label_col not in df.columns:
        raise ValueError("Missing label column 'type'")

    feature_cols = [c for c in df.columns if c not in ("record", label_col)]
    X = df[feature_cols].apply(pd.to_numeric, errors="coerce").fillna(0)
    y = df[label_col].astype(str)

    le = LabelEncoder()
    y_enc = le.fit_transform(y)

    stratify = y_enc if len(set(y_enc)) > 1 and np.min(np.bincount(y_enc)) > 1 else None
    X_train, X_test, y_train, y_test = train_test_split(
        X, y_enc, test_size=0.2, random_state=42, stratify=stratify
    )

    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    model = LogisticRegression(max_iter=200, multi_class="auto")
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)

    cm = confusion_matrix(y_test, y_pred, labels=range(len(le.classes_)))
    disp = ConfusionMatrixDisplay(cm, display_labels=le.classes_)
    disp.plot(xticks_rotation=45)
    plt.tight_layout()
    plt.show()

    return {
        "accuracy": float(acc),
        "classes": list(le.classes_),
        "n_samples": int(len(df)),
        "n_features": int(X.shape[1]),
    }


## Step 4: Run on Mock, Then Request Real


In [5]:
mock_result = train_ecg_classifier(ecg)
print(f"Mock accuracy: {mock_result.public.get('accuracy', 'N/A')}")
mock_result.show_figures("public")

mock_result.request_private()
print("Request sent. Run DO notebook Steps 4-5...")


## Step 5: Wait for Approved Results


In [6]:
approved = bv.wait_for_response(mock_result, timeout=600)
if approved and approved.private:
    print(f"Real accuracy: {approved.private.get('accuracy', 'N/A')}")
    print(f"Classes: {approved.private.get('classes', [])}")
    approved.show_figures("private")
