# 04 · Baseline tabular models
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ORG/Fallrisk-gait/blob/main/datasets/fallrisk/notebooks/04_baseline_tabular.ipynb)

Train pure-Python baselines on the synthetic table: logistic regression and gradient boosting stumps for both the binary high-risk label and the 3-class policy. Report AUROC, macro-F1, calibration curves, and demographic slice metrics (sex and age bands).

In [1]:
from pathlib import Path
import csv
import math
import random
from collections import Counter, defaultdict

def locate_repo_root(max_depth: int = 6) -> Path:
    here = Path.cwd()
    for _ in range(max_depth):
        if (here / 'datasets').exists() and (here / 'data').exists():
            return here
        if here.parent == here:
            break
        here = here.parent
    return Path.cwd()

ROOT = locate_repo_root()
data_path = ROOT / 'datasets' / 'fallrisk' / 'fallrisk_tabular_v1.csv'
with data_path.open() as f:
    rows = list(csv.DictReader(f))
print(f'Loaded {len(rows)} synthetic rows')
random.seed(8)
random.shuffle(rows)
rows = rows[:15000]
split = int(0.8 * len(rows))
train_rows = rows[:split]
test_rows = rows[split:]
FEATURES = ['age_years','bmi','systolic_bp','gait_speed_m_s','postural_sway_cm','medication_count',
             'chronic_conditions','past_falls_6mo','dual_task_cost_percent','fear_of_falling_score',
             'muscle_strength_score','reaction_time_ms']
CLASSES = ['low','moderate','high']


Loaded 50000 synthetic rows


In [2]:
def to_float(row, key):
    return float(row[key])

class StandardScaler:
    def __init__(self, features):
        self.features = features
        self.stats = {feature: (0.0, 1.0) for feature in features}

    def fit(self, data):
        for feat in self.features:
            vals = [to_float(r, feat) for r in data]
            mean_val = sum(vals) / len(vals)
            var = sum((v - mean_val) ** 2 for v in vals) / len(vals)
            std = math.sqrt(var) if var > 0 else 1.0
            self.stats[feat] = (mean_val, std)

    def transform(self, row):
        return [(to_float(row, feat) - self.stats[feat][0]) / (self.stats[feat][1] or 1.0) for feat in self.features]

def logistic(x):
    return 1.0 / (1.0 + math.exp(-x))

class BinaryLogisticRegression:
    def __init__(self, lr=0.12, epochs=120):
        self.lr = lr
        self.epochs = epochs
        self.weights = None

    def fit(self, X, y):
        self.weights = [0.0] * (len(X[0]) + 1)
        n = len(X)
        for epoch in range(self.epochs):
            grad0 = 0.0
            grad = [0.0] * len(X[0])
            for xi, yi in zip(X, y):
                z = self.weights[0] + sum(w * v for w, v in zip(self.weights[1:], xi))
                p = logistic(z)
                diff = p - yi
                grad0 += diff
                for j in range(len(xi)):
                    grad[j] += diff * xi[j]
            step = self.lr / (1 + 0.01 * epoch)
            self.weights[0] -= step * grad0 / n
            for j in range(len(grad)):
                self.weights[j + 1] -= step * grad[j] / n

    def predict_proba(self, X):
        probs = []
        for xi in X:
            z = self.weights[0] + sum(w * v for w, v in zip(self.weights[1:], xi))
            probs.append(logistic(z))
        return probs

class GradientBoostingStumps:
    def __init__(self, n_estimators=18, learning_rate=0.18):
        self.n_estimators = n_estimators
        self.learning_rate = learning_rate
        self.trees = []
        self.base_logit = 0.0

    def fit(self, X, y):
        base_rate = sum(y) / len(y)
        base_rate = min(max(base_rate, 1e-6), 1 - 1e-6)
        self.base_logit = math.log(base_rate / (1 - base_rate))
        preds = [self.base_logit] * len(X)
        for _ in range(self.n_estimators):
            probs = [logistic(val) for val in preds]
            residuals = [yt - pr for yt, pr in zip(y, probs)]
            tree = self._fit_stump(X, residuals)
            self.trees.append(tree)
            for i, xi in enumerate(X):
                update = tree['left_value'] if xi[tree['feature']] <= tree['threshold'] else tree['right_value']
                preds[i] += self.learning_rate * update

    def _fit_stump(self, X, residuals):
        best = {'loss': float('inf')}
        total_sum = sum(residuals)
        total_sq = sum(r * r for r in residuals)
        total_count = len(residuals)
        n_features = len(X[0])
        for feat in range(n_features):
            pairs = sorted(((xi[feat], res) for xi, res in zip(X, residuals)), key=lambda x: x[0])
            prefix_sum = 0.0
            prefix_sq = 0.0
            prefix_count = 0
            for i in range(len(pairs) - 1):
                val, res = pairs[i]
                prefix_sum += res
                prefix_sq += res * res
                prefix_count += 1
                if pairs[i + 1][0] == val:
                    continue
                left_count = prefix_count
                right_count = total_count - prefix_count
                if left_count == 0 or right_count == 0:
                    continue
                left_sum = prefix_sum
                right_sum = total_sum - prefix_sum
                left_sq = prefix_sq
                right_sq = total_sq - prefix_sq
                left_mean = left_sum / left_count
                right_mean = right_sum / right_count
                left_loss = left_sq - left_sum * left_sum / left_count
                right_loss = right_sq - right_sum * right_sum / right_count
                loss = left_loss + right_loss
                if loss < best['loss']:
                    threshold = (val + pairs[i + 1][0]) / 2.0
                    best = {
                        'feature': feat,
                        'threshold': threshold,
                        'left_value': left_mean,
                        'right_value': right_mean,
                        'loss': loss
                    }
        if best['loss'] == float('inf'):
            best = {'feature': 0, 'threshold': 0.0, 'left_value': 0.0, 'right_value': 0.0, 'loss': 0.0}
        return best

    def predict_proba(self, X):
        probs = []
        for xi in X:
            logit_val = self.base_logit
            for tree in self.trees:
                update = tree['left_value'] if xi[tree['feature']] <= tree['threshold'] else tree['right_value']
                logit_val += self.learning_rate * update
            probs.append(logistic(logit_val))
        return probs

def auc_score(probs, labels):
    pairs = sorted(zip(probs, labels), key=lambda x: x[0])
    pos = sum(labels)
    neg = len(labels) - pos
    if pos == 0 or neg == 0:
        return None
    rank = 0.0
    cum_pos = 0
    for idx, (score, label) in enumerate(pairs, start=1):
        if label == 1:
            rank += idx
            cum_pos += 1
    return (rank - cum_pos * (cum_pos + 1) / 2) / (pos * neg)

def macro_f1(labels, preds):
    cm = Counter()
    for yt, yp in zip(labels, preds):
        cm[(yt, yp)] += 1
    def f1_for(target):
        tp = cm[(target, target)]
        fp = cm[(1 - target, target)]
        fn = cm[(target, 1 - target)]
        precision = tp / (tp + fp) if tp + fp else 0.0
        recall = tp / (tp + fn) if tp + fn else 0.0
        return 0.0 if precision + recall == 0 else 2 * precision * recall / (precision + recall)
    return 0.5 * (f1_for(0) + f1_for(1))


def brier_score(probs, labels):
    if not labels:
        return 0.0
    return sum((p - y) ** 2 for p, y in zip(probs, labels)) / len(labels)

def preview_calibration(curve, max_bins=3):
    preview = []
    for entry in curve:
        if entry['count']:
            preview.append(entry)
        if len(preview) >= max_bins:
            break
    return preview

def calibration_curve(probs, labels, bins=10):
    counts = [0] * bins
    prob_sum = [0.0] * bins
    label_sum = [0.0] * bins
    for p, y in zip(probs, labels):
        idx = min(bins - 1, int(p * bins))
        counts[idx] += 1
        prob_sum[idx] += p
        label_sum[idx] += y
    curve = []
    for i in range(bins):
        if counts[i] == 0:
            curve.append({'bin': i, 'count': 0, 'pred': None, 'actual': None})
        else:
            curve.append({'bin': i, 'count': counts[i], 'pred': prob_sum[i] / counts[i], 'actual': label_sum[i] / counts[i]})
    return curve

def slice_macro_f1(rows, probs, labels, threshold=0.5):
    grouped = defaultdict(list)
    for row, prob, label in zip(rows, probs, labels):
        age = float(row['age_years'])
        if age < 70:
            age_group = '<70'
        elif age < 80:
            age_group = '70-79'
        else:
            age_group = '80+'
        grouped[('sex', row['sex'])].append((prob, label))
        grouped[('age_group', age_group)].append((prob, label))
    metrics = {}
    for key, items in grouped.items():
        preds = [1 if p >= threshold else 0 for p, _ in items]
        true = [y for _, y in items]
        metrics[key] = {
            'count': len(items),
            'macro_f1': round(macro_f1(true, preds), 3)
        }
    return metrics


In [3]:
scaler = StandardScaler(FEATURES)
scaler.fit(train_rows)
X_train = [scaler.transform(r) for r in train_rows]
y_train = [int(r['label_high_fall_risk']) for r in train_rows]
X_test = [scaler.transform(r) for r in test_rows]
y_test = [int(r['label_high_fall_risk']) for r in test_rows]

logreg = BinaryLogisticRegression(lr=0.15, epochs=140)
logreg.fit(X_train, y_train)
log_probs = logreg.predict_proba(X_test)
log_preds = [1 if p >= 0.5 else 0 for p in log_probs]
log_auc = auc_score(log_probs, y_test)
log_macro_f1 = macro_f1(y_test, log_preds)
log_brier = brier_score(log_probs, y_test)
print(f'Logistic regression → AUROC: {log_auc:.3f}, macro-F1: {log_macro_f1:.3f}, Brier: {log_brier:.3f}')


Logistic regression → AUROC: 0.927, macro-F1: 0.858, Brier: 0.111


In [4]:
gb = GradientBoostingStumps()
gb.fit(X_train, y_train)
gb_probs = gb.predict_proba(X_test)
gb_preds = [1 if p >= 0.5 else 0 for p in gb_probs]
gb_auc = auc_score(gb_probs, y_test)
gb_macro_f1 = macro_f1(y_test, gb_preds)
gb_brier = brier_score(gb_probs, y_test)
print(f'Gradient boosting stumps → AUROC: {gb_auc:.3f}, macro-F1: {gb_macro_f1:.3f}, Brier: {gb_brier:.3f}')


Gradient boosting stumps → AUROC: 0.855, macro-F1: 0.849, Brier: 0.145


In [5]:
log_calibration = calibration_curve(log_probs, y_test)
for entry in preview_calibration(log_calibration):
    print('Logistic calibration bin', entry)

gb_calibration = calibration_curve(gb_probs, y_test)
for entry in preview_calibration(gb_calibration):
    print('Gradient boosting calibration bin', entry)


Logistic calibration bin {'bin': 0, 'count': 104, 'pred': 0.0770487975918477, 'actual': 0.028846153846153848}
Logistic calibration bin {'bin': 1, 'count': 561, 'pred': 0.15323240117049364, 'actual': 0.09269162210338681}
Logistic calibration bin {'bin': 2, 'count': 535, 'pred': 0.24890581777512216, 'actual': 0.16448598130841122}
Gradient boosting calibration bin {'bin': 3, 'count': 1936, 'pred': 0.35517613943727594, 'actual': 0.22882231404958678}
Gradient boosting calibration bin {'bin': 7, 'count': 1064, 'pred': 0.7597561277917732, 'actual': 1.0}


In [6]:
slices = slice_macro_f1(test_rows, log_probs, y_test)
for key, info in slices.items():
    print('Slice', key, '→', info)


Slice ('sex', 'Female') → {'count': 1907, 'macro_f1': 0.855}
Slice ('age_group', '70-79') → {'count': 1364, 'macro_f1': 0.866}
Slice ('age_group', '80+') → {'count': 402, 'macro_f1': 0.838}
Slice ('age_group', '<70') → {'count': 1234, 'macro_f1': 0.853}
Slice ('sex', 'Male') → {'count': 1093, 'macro_f1': 0.863}


In [7]:
class OneVsRestLogistic:
    def __init__(self, classes):
        self.classes = classes
        self.models = {}

    def fit(self, X, labels):
        for cls in self.classes:
            binary = [1 if label == cls else 0 for label in labels]
            model = BinaryLogisticRegression(lr=0.12, epochs=120)
            model.fit(X, binary)
            self.models[cls] = model

    def predict(self, X):
        outputs = []
        for xi in X:
            scores = {}
            for cls, model in self.models.items():
                score = model.predict_proba([xi])[0]
                scores[cls] = score
            total = sum(scores.values())
            if total == 0:
                probs = {cls: 1.0 / len(self.classes) for cls in self.classes}
            else:
                probs = {cls: val / total for cls, val in scores.items()}
            outputs.append(probs)
        return outputs

class OneVsRestGB:
    def __init__(self, classes):
        self.classes = classes
        self.models = {}

    def fit(self, X, labels):
        for cls in self.classes:
            binary = [1 if label == cls else 0 for label in labels]
            model = GradientBoostingStumps()
            model.fit(X, binary)
            self.models[cls] = model

    def predict(self, X):
        outputs = []
        for xi in X:
            scores = {}
            for cls, model in self.models.items():
                score = model.predict_proba([xi])[0]
                scores[cls] = score
            total = sum(scores.values())
            if total == 0:
                probs = {cls: 1.0 / len(self.classes) for cls in self.classes}
            else:
                probs = {cls: val / total for cls, val in scores.items()}
            outputs.append(probs)
        return outputs

def macro_f1_multiclass(true_labels, pred_labels):
    scores = []
    for cls in CLASSES:
        tp = sum(1 for yt, yp in zip(true_labels, pred_labels) if yt == cls and yp == cls)
        fp = sum(1 for yt, yp in zip(true_labels, pred_labels) if yt != cls and yp == cls)
        fn = sum(1 for yt, yp in zip(true_labels, pred_labels) if yt == cls and yp != cls)
        precision = tp / (tp + fp) if tp + fp else 0.0
        recall = tp / (tp + fn) if tp + fn else 0.0
        f1 = 0.0 if precision + recall == 0 else 2 * precision * recall / (precision + recall)
        scores.append(f1)
    return sum(scores) / len(scores)

def multiclass_auc(true_labels, prob_dicts):
    aucs = []
    for cls in CLASSES:
        binary_true = [1 if label == cls else 0 for label in true_labels]
        binary_scores = [prob[cls] for prob in prob_dicts]
        auc = auc_score(binary_scores, binary_true)
        if auc is not None:
            aucs.append(auc)
    return sum(aucs) / len(aucs) if aucs else None

def multiclass_brier(prob_dicts, true_labels):
    if not true_labels:
        return 0.0
    total = 0.0
    for probs, label in zip(prob_dicts, true_labels):
        for cls in CLASSES:
            target = 1.0 if label == cls else 0.0
            total += (probs.get(cls, 0.0) - target) ** 2
    return total / (len(true_labels) * len(CLASSES))

def multiclass_calibration(prob_dicts, true_labels, bins=10):
    curves = {}
    for cls in CLASSES:
        binary_labels = [1 if label == cls else 0 for label in true_labels]
        binary_probs = [prob.get(cls, 0.0) for prob in prob_dicts]
        curves[cls] = calibration_curve(binary_probs, binary_labels, bins=bins)
    return curves


In [8]:
train_labels = [row['label_risk_level'] for row in train_rows]
test_labels = [row['label_risk_level'] for row in test_rows]
ovr_logreg = OneVsRestLogistic(CLASSES)
ovr_logreg.fit(X_train, train_labels)
log_multi_probs = ovr_logreg.predict(X_test)
log_multi_preds = [max(prob.items(), key=lambda x: x[1])[0] for prob in log_multi_probs]
log_multi_f1 = macro_f1_multiclass(test_labels, log_multi_preds)
log_multi_auc = multiclass_auc(test_labels, log_multi_probs)
log_multi_brier = multiclass_brier(log_multi_probs, test_labels)
log_multi_calibration = multiclass_calibration(log_multi_probs, test_labels)
print(f'Logistic OvR (3-class) → AUROC: {log_multi_auc:.3f}, macro-F1: {log_multi_f1:.3f}, Brier: {log_multi_brier:.3f}')
for cls in CLASSES:
    preview = preview_calibration(log_multi_calibration[cls])
    if preview:
        print(f'Logistic OvR calibration ({cls}) → {preview}')


Logistic OvR (3-class) → AUROC: 0.904, macro-F1: 0.744, Brier: 0.126
Logistic OvR calibration (low) → [{'bin': 0, 'count': 572, 'pred': 0.05592938313651174, 'actual': 0.0}, {'bin': 1, 'count': 595, 'pred': 0.14753184538716943, 'actual': 0.01680672268907563}, {'bin': 2, 'count': 536, 'pred': 0.2498730316172001, 'actual': 0.11380597014925373}]
Logistic OvR calibration (moderate) → [{'bin': 0, 'count': 481, 'pred': 0.05926155340742506, 'actual': 0.004158004158004158}, {'bin': 1, 'count': 676, 'pred': 0.15131315686310884, 'actual': 0.04881656804733728}, {'bin': 2, 'count': 615, 'pred': 0.2469281899248761, 'actual': 0.12032520325203253}]
Logistic OvR calibration (high) → [{'bin': 0, 'count': 102, 'pred': 0.07640565299507092, 'actual': 0.029411764705882353}, {'bin': 1, 'count': 519, 'pred': 0.15404200657938072, 'actual': 0.09248554913294797}, {'bin': 2, 'count': 545, 'pred': 0.2498871955862063, 'actual': 0.1596330275229358}]


In [9]:
ovr_gb = OneVsRestGB(CLASSES)
ovr_gb.fit(X_train, train_labels)
gb_multi_probs = ovr_gb.predict(X_test)
gb_multi_preds = [max(prob.items(), key=lambda x: x[1])[0] for prob in gb_multi_probs]
gb_multi_f1 = macro_f1_multiclass(test_labels, gb_multi_preds)
gb_multi_auc = multiclass_auc(test_labels, gb_multi_probs)
gb_multi_brier = multiclass_brier(gb_multi_probs, test_labels)
gb_multi_calibration = multiclass_calibration(gb_multi_probs, test_labels)
print(f'Gradient boosting OvR (3-class) → AUROC: {gb_multi_auc:.3f}, macro-F1: {gb_multi_f1:.3f}, Brier: {gb_multi_brier:.3f}')
for cls in CLASSES:
    preview = preview_calibration(gb_multi_calibration[cls])
    if preview:
        print(f'Gradient boosting OvR calibration ({cls}) → {preview}')


Gradient boosting OvR (3-class) → AUROC: 0.874, macro-F1: 0.467, Brier: 0.156
Gradient boosting OvR calibration (low) → [{'bin': 1, 'count': 1064, 'pred': 0.1683039198555955, 'actual': 0.0}, {'bin': 2, 'count': 652, 'pred': 0.23582172774482116, 'actual': 0.0}, {'bin': 3, 'count': 1284, 'pred': 0.3474339394037014, 'actual': 0.6043613707165109}]
Gradient boosting OvR calibration (moderate) → [{'bin': 1, 'count': 697, 'pred': 0.14020444013746677, 'actual': 0.0}, {'bin': 2, 'count': 1651, 'pred': 0.25830674108716206, 'actual': 0.14173228346456693}, {'bin': 3, 'count': 652, 'pred': 0.3829117394349736, 'actual': 0.74079754601227}]
Gradient boosting OvR calibration (high) → [{'bin': 3, 'count': 1936, 'pred': 0.3801174610769205, 'actual': 0.22882231404958678}, {'bin': 6, 'count': 1064, 'pred': 0.6685252833857224, 'actual': 1.0}]
