diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index c2141c0..cd7470e 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -103,7 +103,6 @@ scripts/pretrain/run-prosup-heedb-pretrain.sh @StevenSong
scripts/prototypes-from-fms @StevenSong
scripts/queue-experiments.sh @StevenSong
scripts/README.md @StevenSong @sahilsethi0105
-user-study/analyze_results.ipynb @StevenSong
user-study/images @StevenSong
user-study/metadata.csv @StevenSong
user-study/prepare_samples.ipynb @StevenSong
diff --git a/user-study/analyze_results.ipynb b/user-study/analyze_results.ipynb
deleted file mode 100644
index 54f554e..0000000
--- a/user-study/analyze_results.ipynb
+++ /dev/null
@@ -1,2316 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "id": "a8401e54",
- "metadata": {},
- "source": [
- "# ProtoSSL User study analysis\n",
- "\n",
- "This notebook conducts:\n",
- "\n",
- "- **Primary analysis:** participant-level paired comparison of the proportion of responses rated as good for ProtoSSL vs ProtoECGNet, done separately for the two tasks.\n",
- "- **Primary test:** two-sided **Wilcoxon signed-rank test** across participants.\n",
- "- **Comparative A/B/Both/Neither question:** descriptive summaries\n",
- "- **Inter-rater agreement:** **Fleiss' kappa** for the binary yes/no ratings, reported overall and by label.\n",
- "\n",
- "The **participants** are the primary unit of inference.\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "id": "73ba5be4",
- "metadata": {},
- "outputs": [],
- "source": [
- "import math\n",
- "import numpy as np\n",
- "import pandas as pd\n",
- "from scipy.stats import wilcoxon, ttest_rel, binomtest, t as tdist\n",
- "from statsmodels.stats.inter_rater import fleiss_kappa\n",
- "\n",
- "pd.set_option(\"display.max_columns\", None)\n",
- "pd.set_option(\"display.width\", 200)\n",
- "pd.set_option(\"display.float_format\", lambda x: f\"{x:.4f}\")\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "id": "d094a664",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "results shape: (7, 131)\n",
- "metadata shape: (20, 15)\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " record_id | \n",
- " redcap_survey_identifier | \n",
- " user_study_form_timestamp | \n",
- " consent | \n",
- " prototypes_quality_choices | \n",
- " prototypea_quality | \n",
- " prototypeb_quality | \n",
- " prototypes_quality_choices_2 | \n",
- " explanation_a | \n",
- " explanation_b | \n",
- " case1_prototypes_quality_choices | \n",
- " case1_prototypea_quality | \n",
- " case1_prototypeb_quality | \n",
- " case1_prototypes_quality_choices_2 | \n",
- " case1_explanation_a | \n",
- " case1_explanation_b | \n",
- " case2_prototypes_quality_choices | \n",
- " case2_prototypea_quality | \n",
- " case2_prototypeb_quality | \n",
- " case2_prototypes_quality_choices_2 | \n",
- " case2_explanation_a | \n",
- " case2_explanation_b | \n",
- " case3_prototypes_quality_choices | \n",
- " case3_prototypea_quality | \n",
- " case3_prototypeb_quality | \n",
- " case3_prototypes_quality_choices_2 | \n",
- " case3_explanation_a | \n",
- " case3_explanation_b | \n",
- " case4_prototypes_quality_choices | \n",
- " case4_prototypea_quality | \n",
- " case4_prototypeb_quality | \n",
- " case4_prototypes_quality_choices_2 | \n",
- " case4_explanation_a | \n",
- " case4_explanation_b | \n",
- " case5_prototypes_quality_choices | \n",
- " case5_prototypea_quality | \n",
- " case5_prototypeb_quality | \n",
- " case5_prototypes_quality_choices_2 | \n",
- " case5_explanation_a | \n",
- " case5_explanation_b | \n",
- " case6_prototypes_quality_choices | \n",
- " case6_prototypea_quality | \n",
- " case6_prototypeb_quality | \n",
- " case6_prototypes_quality_choices_2 | \n",
- " case6_explanation_a | \n",
- " case6_explanation_b | \n",
- " case7_prototypes_quality_choices | \n",
- " case7_prototypea_quality | \n",
- " case7_prototypeb_quality | \n",
- " case7_prototypes_quality_choices_2 | \n",
- " case7_explanation_a | \n",
- " case7_explanation_b | \n",
- " case8_prototypes_quality_choices | \n",
- " case8_prototypea_quality | \n",
- " case8_prototypeb_quality | \n",
- " case8_prototypes_quality_choices_2 | \n",
- " case8_explanation_a | \n",
- " case8_explanation_b | \n",
- " case9_prototypes_quality_choices | \n",
- " case9_prototypea_quality | \n",
- " case9_prototypeb_quality | \n",
- " case9_prototypes_quality_choices_2 | \n",
- " case9_explanation_a | \n",
- " case9_explanation_b | \n",
- " case10_prototypes_quality_choices | \n",
- " case10_prototypea_quality | \n",
- " case10_prototypeb_quality | \n",
- " case10_prototypes_quality_choices_2 | \n",
- " case10_explanation_a | \n",
- " case10_explanation_b | \n",
- " case11_prototypes_quality_choices | \n",
- " case11_prototypea_quality | \n",
- " case11_prototypeb_quality | \n",
- " case11_prototypes_quality_choices_2 | \n",
- " case11_explanation_a | \n",
- " case11_explanation_b | \n",
- " case12_prototypes_quality_choices | \n",
- " case12_prototypea_quality | \n",
- " case12_prototypeb_quality | \n",
- " case12_prototypes_quality_choices_2 | \n",
- " case12_explanation_a | \n",
- " case12_explanation_b | \n",
- " case13_prototypes_quality_choices | \n",
- " case13_prototypea_quality | \n",
- " case13_prototypeb_quality | \n",
- " case13_prototypes_quality_choices_2 | \n",
- " case13_explanation_a | \n",
- " case13_explanation_b | \n",
- " case14_prototypes_quality_choices | \n",
- " case14_prototypea_quality | \n",
- " case14_prototypeb_quality | \n",
- " case14_prototypes_quality_choices_2 | \n",
- " case14_explanation_a | \n",
- " case14_explanation_b | \n",
- " case15_prototypes_quality_choices | \n",
- " case15_prototypea_quality | \n",
- " case15_prototypeb_quality | \n",
- " case15_prototypes_quality_choices_2 | \n",
- " case15_explanation_a | \n",
- " case15_explanation_b | \n",
- " case16_prototypes_quality_choices | \n",
- " case16_prototypea_quality | \n",
- " case16_prototypeb_quality | \n",
- " case16_prototypes_quality_choices_2 | \n",
- " case16_explanation_a | \n",
- " case16_explanation_b | \n",
- " case17_prototypes_quality_choices | \n",
- " case17_prototypea_quality | \n",
- " case17_prototypeb_quality | \n",
- " case17_prototypes_quality_choices_2 | \n",
- " case17_explanation_a | \n",
- " case17_explanation_b | \n",
- " case18_prototypes_quality_choices | \n",
- " case18_prototypea_quality | \n",
- " case18_prototypeb_quality | \n",
- " case18_prototypes_quality_choices_2 | \n",
- " case18_explanation_a | \n",
- " case18_explanation_b | \n",
- " case19_prototypes_quality_choices | \n",
- " case19_prototypea_quality | \n",
- " case19_prototypeb_quality | \n",
- " case19_prototypes_quality_choices_2 | \n",
- " case19_explanation_a | \n",
- " case19_explanation_b | \n",
- " case20_prototypes_quality_choices | \n",
- " case20_prototypea_quality | \n",
- " case20_prototypeb_quality | \n",
- " case20_prototypes_quality_choices_2 | \n",
- " case20_explanation_a | \n",
- " case20_explanation_b | \n",
- " user_study_form_complete | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 5 | \n",
- " NaN | \n",
- " 2026-04-06 18:18:56 | \n",
- " 1 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 2 | \n",
- " 1 | \n",
- " 1 | \n",
- " 2 | \n",
- " 1 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 2 | \n",
- " 1 | \n",
- " 1 | \n",
- " 2 | \n",
- " 1 | \n",
- " 1 | \n",
- " 2 | \n",
- " 1 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 2 | \n",
- " 1 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 2 | \n",
- " 1 | \n",
- " 1 | \n",
- " 2 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 7 | \n",
- " NaN | \n",
- " 2026-04-09 11:26:53 | \n",
- " 1 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 3 | \n",
- " 0 | \n",
- " 0 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 0 | \n",
- " 1 | \n",
- " 1 | \n",
- " 0 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 0 | \n",
- " 1 | \n",
- " 1 | \n",
- " 0 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 0 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 0 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 2 | \n",
- " 1 | \n",
- " 0 | \n",
- " 1 | \n",
- " 1 | \n",
- " 0 | \n",
- " 1 | \n",
- " 1 | \n",
- " 0 | \n",
- " 2 | \n",
- " 1 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 3 | \n",
- " 1 | \n",
- " 1 | \n",
- " 2 | \n",
- " 1 | \n",
- " 1 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 0 | \n",
- " 1 | \n",
- " 1 | \n",
- " 0 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 1 | \n",
- " 0 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " 2 | \n",
- " 0 | \n",
- " 1 | \n",
- " 2 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " record_id redcap_survey_identifier user_study_form_timestamp consent prototypes_quality_choices prototypea_quality prototypeb_quality prototypes_quality_choices_2 explanation_a \\\n",
- "0 5 NaN 2026-04-06 18:18:56 1 2 0 1 2 0 \n",
- "1 7 NaN 2026-04-09 11:26:53 1 2 0 1 2 0 \n",
- "\n",
- " explanation_b case1_prototypes_quality_choices case1_prototypea_quality case1_prototypeb_quality case1_prototypes_quality_choices_2 case1_explanation_a case1_explanation_b \\\n",
- "0 1 3 1 1 3 1 1 \n",
- "1 1 3 1 1 1 1 1 \n",
- "\n",
- " case2_prototypes_quality_choices case2_prototypea_quality case2_prototypeb_quality case2_prototypes_quality_choices_2 case2_explanation_a case2_explanation_b \\\n",
- "0 1 1 1 1 1 1 \n",
- "1 3 1 1 3 1 1 \n",
- "\n",
- " case3_prototypes_quality_choices case3_prototypea_quality case3_prototypeb_quality case3_prototypes_quality_choices_2 case3_explanation_a case3_explanation_b \\\n",
- "0 2 1 1 2 1 1 \n",
- "1 3 0 0 2 0 1 \n",
- "\n",
- " case4_prototypes_quality_choices case4_prototypea_quality case4_prototypeb_quality case4_prototypes_quality_choices_2 case4_explanation_a case4_explanation_b \\\n",
- "0 3 1 1 1 1 1 \n",
- "1 1 1 0 1 1 0 \n",
- "\n",
- " case5_prototypes_quality_choices case5_prototypea_quality case5_prototypeb_quality case5_prototypes_quality_choices_2 case5_explanation_a case5_explanation_b \\\n",
- "0 3 1 1 3 1 1 \n",
- "1 3 1 1 3 1 1 \n",
- "\n",
- " case6_prototypes_quality_choices case6_prototypea_quality case6_prototypeb_quality case6_prototypes_quality_choices_2 case6_explanation_a case6_explanation_b \\\n",
- "0 3 1 1 3 1 1 \n",
- "1 1 1 0 1 1 0 \n",
- "\n",
- " case7_prototypes_quality_choices case7_prototypea_quality case7_prototypeb_quality case7_prototypes_quality_choices_2 case7_explanation_a case7_explanation_b \\\n",
- "0 3 1 1 3 1 1 \n",
- "1 1 1 1 1 1 0 \n",
- "\n",
- " case8_prototypes_quality_choices case8_prototypea_quality case8_prototypeb_quality case8_prototypes_quality_choices_2 case8_explanation_a case8_explanation_b \\\n",
- "0 3 1 1 3 1 1 \n",
- "1 3 1 1 3 1 1 \n",
- "\n",
- " case9_prototypes_quality_choices case9_prototypea_quality case9_prototypeb_quality case9_prototypes_quality_choices_2 case9_explanation_a case9_explanation_b \\\n",
- "0 3 1 1 1 1 1 \n",
- "1 3 1 1 1 1 0 \n",
- "\n",
- " case10_prototypes_quality_choices case10_prototypea_quality case10_prototypeb_quality case10_prototypes_quality_choices_2 case10_explanation_a case10_explanation_b \\\n",
- "0 2 1 1 2 1 1 \n",
- "1 3 1 1 2 1 0 \n",
- "\n",
- " case11_prototypes_quality_choices case11_prototypea_quality case11_prototypeb_quality case11_prototypes_quality_choices_2 case11_explanation_a case11_explanation_b \\\n",
- "0 2 1 1 3 1 1 \n",
- "1 1 1 0 1 1 0 \n",
- "\n",
- " case12_prototypes_quality_choices case12_prototypea_quality case12_prototypeb_quality case12_prototypes_quality_choices_2 case12_explanation_a case12_explanation_b \\\n",
- "0 2 1 1 3 1 1 \n",
- "1 2 1 1 3 1 1 \n",
- "\n",
- " case13_prototypes_quality_choices case13_prototypea_quality case13_prototypeb_quality case13_prototypes_quality_choices_2 case13_explanation_a case13_explanation_b \\\n",
- "0 3 1 1 3 1 1 \n",
- "1 2 0 1 3 1 1 \n",
- "\n",
- " case14_prototypes_quality_choices case14_prototypea_quality case14_prototypeb_quality case14_prototypes_quality_choices_2 case14_explanation_a case14_explanation_b \\\n",
- "0 3 1 1 3 1 1 \n",
- "1 3 1 1 1 1 1 \n",
- "\n",
- " case15_prototypes_quality_choices case15_prototypea_quality case15_prototypeb_quality case15_prototypes_quality_choices_2 case15_explanation_a case15_explanation_b \\\n",
- "0 3 1 1 3 1 1 \n",
- "1 3 1 1 2 1 1 \n",
- "\n",
- " case16_prototypes_quality_choices case16_prototypea_quality case16_prototypeb_quality case16_prototypes_quality_choices_2 case16_explanation_a case16_explanation_b \\\n",
- "0 2 0 1 2 0 1 \n",
- "1 2 0 1 2 0 1 \n",
- "\n",
- " case17_prototypes_quality_choices case17_prototypea_quality case17_prototypeb_quality case17_prototypes_quality_choices_2 case17_explanation_a case17_explanation_b \\\n",
- "0 3 1 1 1 1 1 \n",
- "1 1 1 0 1 1 0 \n",
- "\n",
- " case18_prototypes_quality_choices case18_prototypea_quality case18_prototypeb_quality case18_prototypes_quality_choices_2 case18_explanation_a case18_explanation_b \\\n",
- "0 2 0 1 2 0 1 \n",
- "1 2 0 1 2 0 1 \n",
- "\n",
- " case19_prototypes_quality_choices case19_prototypea_quality case19_prototypeb_quality case19_prototypes_quality_choices_2 case19_explanation_a case19_explanation_b \\\n",
- "0 3 1 1 3 1 1 \n",
- "1 1 1 1 1 1 0 \n",
- "\n",
- " case20_prototypes_quality_choices case20_prototypea_quality case20_prototypeb_quality case20_prototypes_quality_choices_2 case20_explanation_a case20_explanation_b user_study_form_complete \n",
- "0 3 1 1 2 1 1 2 \n",
- "1 2 0 1 2 0 1 2 "
- ]
- },
- "execution_count": 16,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "results = pd.read_csv(\"results.csv\")\n",
- "metadata = pd.read_csv(\"metadata.csv\")\n",
- "\n",
- "print(\"results shape:\", results.shape)\n",
- "print(\"metadata shape:\", metadata.shape)\n",
- "results.head(2)\n"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "7f870f0f",
- "metadata": {},
- "source": [
- "### Decode REDCap responses into analysis tables\n",
- "\n",
- "`yesno` contains one row per participant × case × task × model for the binary yes/no questions.\n",
- "\n",
- "`prefs` contains one row per participant × case × task for the A/B/Both/Neither comparative question, decoded back to the actual model identities using the metadata file.\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 17,
- "id": "16acb6b2",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "yesno shape: (560, 6)\n",
- "prefs shape: (280, 5)\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " participant | \n",
- " case_id | \n",
- " label | \n",
- " task | \n",
- " model | \n",
- " good | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 5 | \n",
- " 1 | \n",
- " AMI | \n",
- " global | \n",
- " ProtoSSL | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 5 | \n",
- " 1 | \n",
- " AMI | \n",
- " global | \n",
- " ProtoECGNet | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 5 | \n",
- " 1 | \n",
- " AMI | \n",
- " paired | \n",
- " ProtoSSL | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 5 | \n",
- " 1 | \n",
- " AMI | \n",
- " paired | \n",
- " ProtoECGNet | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 5 | \n",
- " 2 | \n",
- " AMI | \n",
- " global | \n",
- " ProtoSSL | \n",
- " 1 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " participant case_id label task model good\n",
- "0 5 1 AMI global ProtoSSL 1\n",
- "1 5 1 AMI global ProtoECGNet 1\n",
- "2 5 1 AMI paired ProtoSSL 1\n",
- "3 5 1 AMI paired ProtoECGNet 1\n",
- "4 5 2 AMI global ProtoSSL 1"
- ]
- },
- "execution_count": 17,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "case_map = (\n",
- " metadata.rename(\n",
- " columns={\n",
- " \"Study Index\": \"case_id\",\n",
- " \"Label\": \"label\",\n",
- " \"ProtoSSL Assignment\": \"ssl_assignment\",\n",
- " \"ProtoECGNet Assignment\": \"ecg_assignment\",\n",
- " }\n",
- " )[[\"case_id\", \"label\", \"ssl_assignment\", \"ecg_assignment\"]]\n",
- " .copy()\n",
- ")\n",
- "case_map[\"case_id\"] = case_map[\"case_id\"].astype(int)\n",
- "\n",
- "pref_code = {1: \"A\", 2: \"B\", 3: \"Both\", 4: \"Neither\"}\n",
- "\n",
- "yes_rows = []\n",
- "pref_rows = []\n",
- "\n",
- "for _, row in results.iterrows():\n",
- " participant = int(row[\"record_id\"])\n",
- " for case_id in range(1, 21):\n",
- " meta_row = case_map.loc[case_map[\"case_id\"] == case_id].iloc[0]\n",
- "\n",
- " task_specs = [\n",
- " (\"global\",\n",
- " f\"case{case_id}_prototypea_quality\",\n",
- " f\"case{case_id}_prototypeb_quality\",\n",
- " f\"case{case_id}_prototypes_quality_choices\"),\n",
- " (\"paired\",\n",
- " f\"case{case_id}_explanation_a\",\n",
- " f\"case{case_id}_explanation_b\",\n",
- " f\"case{case_id}_prototypes_quality_choices_2\"),\n",
- " ]\n",
- "\n",
- " for task, a_col, b_col, pref_col in task_specs:\n",
- " for shown_letter, col in [(\"A\", a_col), (\"B\", b_col)]:\n",
- " actual_model = \"ProtoSSL\" if meta_row[\"ssl_assignment\"] == shown_letter else \"ProtoECGNet\"\n",
- " yes_rows.append(\n",
- " {\n",
- " \"participant\": participant,\n",
- " \"case_id\": case_id,\n",
- " \"label\": meta_row[\"label\"],\n",
- " \"task\": task,\n",
- " \"model\": actual_model,\n",
- " \"good\": int(row[col]),\n",
- " }\n",
- " )\n",
- "\n",
- " pref_value = pref_code[int(row[pref_col])]\n",
- " if pref_value in [\"A\", \"B\"]:\n",
- " actual_pref = \"ProtoSSL\" if meta_row[\"ssl_assignment\"] == pref_value else \"ProtoECGNet\"\n",
- " else:\n",
- " actual_pref = pref_value\n",
- "\n",
- " pref_rows.append(\n",
- " {\n",
- " \"participant\": participant,\n",
- " \"case_id\": case_id,\n",
- " \"label\": meta_row[\"label\"],\n",
- " \"task\": task,\n",
- " \"preference\": actual_pref,\n",
- " }\n",
- " )\n",
- "\n",
- "yesno = pd.DataFrame(yes_rows)\n",
- "prefs = pd.DataFrame(pref_rows)\n",
- "\n",
- "print(\"yesno shape:\", yesno.shape)\n",
- "print(\"prefs shape:\", prefs.shape)\n",
- "yesno.head()\n"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "0dce4296",
- "metadata": {},
- "source": [
- "### Descriptive summaries for the binary yes/no questions"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "id": "961b8054",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " task | \n",
- " model | \n",
- " n_yes | \n",
- " n_total | \n",
- " proportion | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " global | \n",
- " ProtoECGNet | \n",
- " 93 | \n",
- " 140 | \n",
- " 0.6643 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " global | \n",
- " ProtoSSL | \n",
- " 128 | \n",
- " 140 | \n",
- " 0.9143 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " paired | \n",
- " ProtoECGNet | \n",
- " 95 | \n",
- " 140 | \n",
- " 0.6786 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " paired | \n",
- " ProtoSSL | \n",
- " 116 | \n",
- " 140 | \n",
- " 0.8286 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " task model n_yes n_total proportion\n",
- "0 global ProtoECGNet 93 140 0.6643\n",
- "1 global ProtoSSL 128 140 0.9143\n",
- "2 paired ProtoECGNet 95 140 0.6786\n",
- "3 paired ProtoSSL 116 140 0.8286"
- ]
- },
- "execution_count": 18,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "overall_yesno = (\n",
- " yesno.groupby([\"task\", \"model\"])[\"good\"]\n",
- " .agg(n_yes=\"sum\", n_total=\"count\", proportion=\"mean\")\n",
- " .reset_index()\n",
- ")\n",
- "overall_yesno\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "id": "da09502f",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " task | \n",
- " label | \n",
- " model | \n",
- " n_yes | \n",
- " n_total | \n",
- " proportion | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " global | \n",
- " AMI | \n",
- " ProtoECGNet | \n",
- " 23 | \n",
- " 35 | \n",
- " 0.6571 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " global | \n",
- " AMI | \n",
- " ProtoSSL | \n",
- " 30 | \n",
- " 35 | \n",
- " 0.8571 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " global | \n",
- " CLBBB | \n",
- " ProtoECGNet | \n",
- " 33 | \n",
- " 35 | \n",
- " 0.9429 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " global | \n",
- " CLBBB | \n",
- " ProtoSSL | \n",
- " 32 | \n",
- " 35 | \n",
- " 0.9143 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " global | \n",
- " CRBBB | \n",
- " ProtoECGNet | \n",
- " 19 | \n",
- " 35 | \n",
- " 0.5429 | \n",
- "
\n",
- " \n",
- " | 5 | \n",
- " global | \n",
- " CRBBB | \n",
- " ProtoSSL | \n",
- " 32 | \n",
- " 35 | \n",
- " 0.9143 | \n",
- "
\n",
- " \n",
- " | 6 | \n",
- " global | \n",
- " PVC | \n",
- " ProtoECGNet | \n",
- " 18 | \n",
- " 35 | \n",
- " 0.5143 | \n",
- "
\n",
- " \n",
- " | 7 | \n",
- " global | \n",
- " PVC | \n",
- " ProtoSSL | \n",
- " 34 | \n",
- " 35 | \n",
- " 0.9714 | \n",
- "
\n",
- " \n",
- " | 8 | \n",
- " paired | \n",
- " AMI | \n",
- " ProtoECGNet | \n",
- " 23 | \n",
- " 35 | \n",
- " 0.6571 | \n",
- "
\n",
- " \n",
- " | 9 | \n",
- " paired | \n",
- " AMI | \n",
- " ProtoSSL | \n",
- " 28 | \n",
- " 35 | \n",
- " 0.8000 | \n",
- "
\n",
- " \n",
- " | 10 | \n",
- " paired | \n",
- " CLBBB | \n",
- " ProtoECGNet | \n",
- " 33 | \n",
- " 35 | \n",
- " 0.9429 | \n",
- "
\n",
- " \n",
- " | 11 | \n",
- " paired | \n",
- " CLBBB | \n",
- " ProtoSSL | \n",
- " 27 | \n",
- " 35 | \n",
- " 0.7714 | \n",
- "
\n",
- " \n",
- " | 12 | \n",
- " paired | \n",
- " CRBBB | \n",
- " ProtoECGNet | \n",
- " 19 | \n",
- " 35 | \n",
- " 0.5429 | \n",
- "
\n",
- " \n",
- " | 13 | \n",
- " paired | \n",
- " CRBBB | \n",
- " ProtoSSL | \n",
- " 30 | \n",
- " 35 | \n",
- " 0.8571 | \n",
- "
\n",
- " \n",
- " | 14 | \n",
- " paired | \n",
- " PVC | \n",
- " ProtoECGNet | \n",
- " 20 | \n",
- " 35 | \n",
- " 0.5714 | \n",
- "
\n",
- " \n",
- " | 15 | \n",
- " paired | \n",
- " PVC | \n",
- " ProtoSSL | \n",
- " 31 | \n",
- " 35 | \n",
- " 0.8857 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " task label model n_yes n_total proportion\n",
- "0 global AMI ProtoECGNet 23 35 0.6571\n",
- "1 global AMI ProtoSSL 30 35 0.8571\n",
- "2 global CLBBB ProtoECGNet 33 35 0.9429\n",
- "3 global CLBBB ProtoSSL 32 35 0.9143\n",
- "4 global CRBBB ProtoECGNet 19 35 0.5429\n",
- "5 global CRBBB ProtoSSL 32 35 0.9143\n",
- "6 global PVC ProtoECGNet 18 35 0.5143\n",
- "7 global PVC ProtoSSL 34 35 0.9714\n",
- "8 paired AMI ProtoECGNet 23 35 0.6571\n",
- "9 paired AMI ProtoSSL 28 35 0.8000\n",
- "10 paired CLBBB ProtoECGNet 33 35 0.9429\n",
- "11 paired CLBBB ProtoSSL 27 35 0.7714\n",
- "12 paired CRBBB ProtoECGNet 19 35 0.5429\n",
- "13 paired CRBBB ProtoSSL 30 35 0.8571\n",
- "14 paired PVC ProtoECGNet 20 35 0.5714\n",
- "15 paired PVC ProtoSSL 31 35 0.8857"
- ]
- },
- "execution_count": 19,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "label_yesno = (\n",
- " yesno.groupby([\"task\", \"label\", \"model\"])[\"good\"]\n",
- " .agg(n_yes=\"sum\", n_total=\"count\", proportion=\"mean\")\n",
- " .reset_index()\n",
- ")\n",
- "label_yesno\n"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "03a50af7",
- "metadata": {},
- "source": [
- "### Primary analysis\n",
- "\n",
- "For each participant and each task, compute the proportion of responses rated as good for each model across the 20 cases. Then compare ProtoSSL vs ProtoECGNet with a **paired Wilcoxon signed-rank test**.\n",
- "\n",
- "Because there are **two primary task-level hypotheses** (`global` and `paired`), apply **Holm correction** across those two p-values.\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "id": "3194ed44",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | model | \n",
- " participant | \n",
- " task | \n",
- " ProtoECGNet | \n",
- " ProtoSSL | \n",
- " difference | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " 5 | \n",
- " global | \n",
- " 0.9000 | \n",
- " 1.0000 | \n",
- " 0.1000 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " 5 | \n",
- " paired | \n",
- " 0.9000 | \n",
- " 1.0000 | \n",
- " 0.1000 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " 7 | \n",
- " global | \n",
- " 0.7000 | \n",
- " 0.8000 | \n",
- " 0.1000 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " 7 | \n",
- " paired | \n",
- " 0.6000 | \n",
- " 0.8000 | \n",
- " 0.2000 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " 8 | \n",
- " global | \n",
- " 0.4500 | \n",
- " 0.8000 | \n",
- " 0.3500 | \n",
- "
\n",
- " \n",
- " | 5 | \n",
- " 8 | \n",
- " paired | \n",
- " 0.5000 | \n",
- " 0.5500 | \n",
- " 0.0500 | \n",
- "
\n",
- " \n",
- " | 6 | \n",
- " 9 | \n",
- " global | \n",
- " 0.6000 | \n",
- " 0.9500 | \n",
- " 0.3500 | \n",
- "
\n",
- " \n",
- " | 7 | \n",
- " 9 | \n",
- " paired | \n",
- " 0.5500 | \n",
- " 0.9000 | \n",
- " 0.3500 | \n",
- "
\n",
- " \n",
- " | 8 | \n",
- " 10 | \n",
- " global | \n",
- " 0.6000 | \n",
- " 0.9500 | \n",
- " 0.3500 | \n",
- "
\n",
- " \n",
- " | 9 | \n",
- " 10 | \n",
- " paired | \n",
- " 0.7500 | \n",
- " 0.9500 | \n",
- " 0.2000 | \n",
- "
\n",
- " \n",
- " | 10 | \n",
- " 11 | \n",
- " global | \n",
- " 0.6500 | \n",
- " 0.9500 | \n",
- " 0.3000 | \n",
- "
\n",
- " \n",
- " | 11 | \n",
- " 11 | \n",
- " paired | \n",
- " 0.8000 | \n",
- " 0.9000 | \n",
- " 0.1000 | \n",
- "
\n",
- " \n",
- " | 12 | \n",
- " 12 | \n",
- " global | \n",
- " 0.7500 | \n",
- " 0.9500 | \n",
- " 0.2000 | \n",
- "
\n",
- " \n",
- " | 13 | \n",
- " 12 | \n",
- " paired | \n",
- " 0.6500 | \n",
- " 0.7000 | \n",
- " 0.0500 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- "model participant task ProtoECGNet ProtoSSL difference\n",
- "0 5 global 0.9000 1.0000 0.1000\n",
- "1 5 paired 0.9000 1.0000 0.1000\n",
- "2 7 global 0.7000 0.8000 0.1000\n",
- "3 7 paired 0.6000 0.8000 0.2000\n",
- "4 8 global 0.4500 0.8000 0.3500\n",
- "5 8 paired 0.5000 0.5500 0.0500\n",
- "6 9 global 0.6000 0.9500 0.3500\n",
- "7 9 paired 0.5500 0.9000 0.3500\n",
- "8 10 global 0.6000 0.9500 0.3500\n",
- "9 10 paired 0.7500 0.9500 0.2000\n",
- "10 11 global 0.6500 0.9500 0.3000\n",
- "11 11 paired 0.8000 0.9000 0.1000\n",
- "12 12 global 0.7500 0.9500 0.2000\n",
- "13 12 paired 0.6500 0.7000 0.0500"
- ]
- },
- "execution_count": 20,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "participant_summary = (\n",
- " yesno.groupby([\"participant\", \"task\", \"model\"])[\"good\"]\n",
- " .mean()\n",
- " .unstack(\"model\")\n",
- " .reset_index()\n",
- ")\n",
- "\n",
- "participant_summary[\"difference\"] = (\n",
- " participant_summary[\"ProtoSSL\"] - participant_summary[\"ProtoECGNet\"]\n",
- ")\n",
- "\n",
- "participant_summary\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 21,
- "id": "b6ff4f59",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " task | \n",
- " n_participants | \n",
- " ProtoSSL_mean | \n",
- " ProtoECGNet_mean | \n",
- " mean_difference | \n",
- " ci95_low | \n",
- " ci95_high | \n",
- " wilcoxon_W | \n",
- " wilcoxon_p | \n",
- " paired_t_p | \n",
- " sign_test_p | \n",
- " wilcoxon_p_holm | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " global | \n",
- " 7 | \n",
- " 0.9143 | \n",
- " 0.6643 | \n",
- " 0.2500 | \n",
- " 0.1432 | \n",
- " 0.3568 | \n",
- " 0.0000 | \n",
- " 0.0178 | \n",
- " 0.0012 | \n",
- " 0.0156 | \n",
- " 0.0355 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " paired | \n",
- " 7 | \n",
- " 0.8286 | \n",
- " 0.6786 | \n",
- " 0.1500 | \n",
- " 0.0501 | \n",
- " 0.2499 | \n",
- " 0.0000 | \n",
- " 0.0178 | \n",
- " 0.0104 | \n",
- " 0.0156 | \n",
- " 0.0355 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " task n_participants ProtoSSL_mean ProtoECGNet_mean mean_difference ci95_low ci95_high wilcoxon_W wilcoxon_p paired_t_p sign_test_p wilcoxon_p_holm\n",
- "0 global 7 0.9143 0.6643 0.2500 0.1432 0.3568 0.0000 0.0178 0.0012 0.0156 0.0355\n",
- "1 paired 7 0.8286 0.6786 0.1500 0.0501 0.2499 0.0000 0.0178 0.0104 0.0156 0.0355"
- ]
- },
- "execution_count": 21,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "def holm_adjust(pvalues):\n",
- " pvalues = np.asarray(pvalues, dtype=float)\n",
- " m = len(pvalues)\n",
- " order = np.argsort(pvalues)\n",
- " adjusted = np.empty_like(pvalues)\n",
- " running_max = 0.0\n",
- "\n",
- " for rank, idx in enumerate(order):\n",
- " candidate = (m - rank) * pvalues[idx]\n",
- " running_max = max(running_max, candidate)\n",
- " adjusted[idx] = min(running_max, 1.0)\n",
- "\n",
- " return adjusted\n",
- "\n",
- "def participant_level_analysis(df):\n",
- " rows = []\n",
- " raw_wilcoxon_p = []\n",
- "\n",
- " for task in [\"global\", \"paired\"]:\n",
- " sub = df[df[\"task\"] == task].copy()\n",
- " diffs = sub[\"difference\"].to_numpy()\n",
- "\n",
- " w = wilcoxon(diffs, alternative=\"two-sided\", zero_method=\"wilcox\", method=\"approx\")\n",
- " ttest = ttest_rel(sub[\"ProtoSSL\"], sub[\"ProtoECGNet\"])\n",
- " sign = binomtest(np.sum(diffs > 0), np.sum(diffs != 0), p=0.5, alternative=\"two-sided\")\n",
- "\n",
- " mean_diff = float(np.mean(diffs))\n",
- " sd_diff = float(np.std(diffs, ddof=1))\n",
- " se_diff = sd_diff / math.sqrt(len(diffs))\n",
- " tcrit = tdist.ppf(0.975, df=len(diffs) - 1)\n",
- " ci_low = mean_diff - tcrit * se_diff\n",
- " ci_high = mean_diff + tcrit * se_diff\n",
- "\n",
- " rows.append(\n",
- " {\n",
- " \"task\": task,\n",
- " \"n_participants\": len(sub),\n",
- " \"ProtoSSL_mean\": sub[\"ProtoSSL\"].mean(),\n",
- " \"ProtoECGNet_mean\": sub[\"ProtoECGNet\"].mean(),\n",
- " \"mean_difference\": mean_diff,\n",
- " \"ci95_low\": ci_low,\n",
- " \"ci95_high\": ci_high,\n",
- " \"wilcoxon_W\": float(w.statistic),\n",
- " \"wilcoxon_p\": float(w.pvalue),\n",
- " \"paired_t_p\": float(ttest.pvalue),\n",
- " \"sign_test_p\": float(sign.pvalue),\n",
- " }\n",
- " )\n",
- " raw_wilcoxon_p.append(float(w.pvalue))\n",
- "\n",
- " out = pd.DataFrame(rows)\n",
- " out[\"wilcoxon_p_holm\"] = holm_adjust(out[\"wilcoxon_p\"].to_numpy())\n",
- " return out\n",
- "\n",
- "primary_results = participant_level_analysis(participant_summary)\n",
- "primary_results\n"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "2c182260",
- "metadata": {},
- "source": [
- "## Per-label participant-level summaries\n",
- "\n",
- "These are useful to show **where** the overall pattern comes from, but I recommend keeping them **descriptive only** in the paper because each label has only 5 cases.\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 22,
- "id": "53b57b45",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " task | \n",
- " label | \n",
- " ProtoSSL_mean | \n",
- " ProtoECGNet_mean | \n",
- " mean_difference | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " global | \n",
- " AMI | \n",
- " 0.8571 | \n",
- " 0.6571 | \n",
- " 0.2000 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " global | \n",
- " CLBBB | \n",
- " 0.9143 | \n",
- " 0.9429 | \n",
- " -0.0286 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " global | \n",
- " CRBBB | \n",
- " 0.9143 | \n",
- " 0.5429 | \n",
- " 0.3714 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " global | \n",
- " PVC | \n",
- " 0.9714 | \n",
- " 0.5143 | \n",
- " 0.4571 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " paired | \n",
- " AMI | \n",
- " 0.8000 | \n",
- " 0.6571 | \n",
- " 0.1429 | \n",
- "
\n",
- " \n",
- " | 5 | \n",
- " paired | \n",
- " CLBBB | \n",
- " 0.7714 | \n",
- " 0.9429 | \n",
- " -0.1714 | \n",
- "
\n",
- " \n",
- " | 6 | \n",
- " paired | \n",
- " CRBBB | \n",
- " 0.8571 | \n",
- " 0.5429 | \n",
- " 0.3143 | \n",
- "
\n",
- " \n",
- " | 7 | \n",
- " paired | \n",
- " PVC | \n",
- " 0.8857 | \n",
- " 0.5714 | \n",
- " 0.3143 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " task label ProtoSSL_mean ProtoECGNet_mean mean_difference\n",
- "0 global AMI 0.8571 0.6571 0.2000\n",
- "1 global CLBBB 0.9143 0.9429 -0.0286\n",
- "2 global CRBBB 0.9143 0.5429 0.3714\n",
- "3 global PVC 0.9714 0.5143 0.4571\n",
- "4 paired AMI 0.8000 0.6571 0.1429\n",
- "5 paired CLBBB 0.7714 0.9429 -0.1714\n",
- "6 paired CRBBB 0.8571 0.5429 0.3143\n",
- "7 paired PVC 0.8857 0.5714 0.3143"
- ]
- },
- "execution_count": 22,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "participant_by_label = (\n",
- " yesno.groupby([\"participant\", \"task\", \"label\", \"model\"])[\"good\"]\n",
- " .mean()\n",
- " .unstack(\"model\")\n",
- " .reset_index()\n",
- ")\n",
- "participant_by_label[\"difference\"] = (\n",
- " participant_by_label[\"ProtoSSL\"] - participant_by_label[\"ProtoECGNet\"]\n",
- ")\n",
- "\n",
- "per_label_summary = (\n",
- " participant_by_label.groupby([\"task\", \"label\"])\n",
- " .agg(\n",
- " ProtoSSL_mean=(\"ProtoSSL\", \"mean\"),\n",
- " ProtoECGNet_mean=(\"ProtoECGNet\", \"mean\"),\n",
- " mean_difference=(\"difference\", \"mean\"),\n",
- " )\n",
- " .reset_index()\n",
- ")\n",
- "per_label_summary\n"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "e1dd8d16",
- "metadata": {},
- "source": [
- "### Descriptive summaries for the comparative A/B/Both/Neither question"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 23,
- "id": "06bb6c2f",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | preference | \n",
- " task | \n",
- " Both | \n",
- " Neither | \n",
- " ProtoECGNet | \n",
- " ProtoSSL | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " global | \n",
- " 51 | \n",
- " 0 | \n",
- " 25 | \n",
- " 64 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " paired | \n",
- " 34 | \n",
- " 6 | \n",
- " 36 | \n",
- " 64 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- "preference task Both Neither ProtoECGNet ProtoSSL\n",
- "0 global 51 0 25 64\n",
- "1 paired 34 6 36 64"
- ]
- },
- "execution_count": 23,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "preference_overall = (\n",
- " prefs.groupby([\"task\", \"preference\"])\n",
- " .size()\n",
- " .unstack(fill_value=0)\n",
- " .reset_index()\n",
- ")\n",
- "preference_overall\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 24,
- "id": "a324d62e",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | preference | \n",
- " task | \n",
- " label | \n",
- " Both | \n",
- " Neither | \n",
- " ProtoECGNet | \n",
- " ProtoSSL | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " global | \n",
- " AMI | \n",
- " 16 | \n",
- " 0 | \n",
- " 5 | \n",
- " 14 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " global | \n",
- " CLBBB | \n",
- " 17 | \n",
- " 0 | \n",
- " 10 | \n",
- " 8 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " global | \n",
- " CRBBB | \n",
- " 8 | \n",
- " 0 | \n",
- " 5 | \n",
- " 22 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " global | \n",
- " PVC | \n",
- " 10 | \n",
- " 0 | \n",
- " 5 | \n",
- " 20 | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " paired | \n",
- " AMI | \n",
- " 8 | \n",
- " 3 | \n",
- " 9 | \n",
- " 15 | \n",
- "
\n",
- " \n",
- " | 5 | \n",
- " paired | \n",
- " CLBBB | \n",
- " 12 | \n",
- " 2 | \n",
- " 17 | \n",
- " 4 | \n",
- "
\n",
- " \n",
- " | 6 | \n",
- " paired | \n",
- " CRBBB | \n",
- " 8 | \n",
- " 0 | \n",
- " 5 | \n",
- " 22 | \n",
- "
\n",
- " \n",
- " | 7 | \n",
- " paired | \n",
- " PVC | \n",
- " 6 | \n",
- " 1 | \n",
- " 5 | \n",
- " 23 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- "preference task label Both Neither ProtoECGNet ProtoSSL\n",
- "0 global AMI 16 0 5 14\n",
- "1 global CLBBB 17 0 10 8\n",
- "2 global CRBBB 8 0 5 22\n",
- "3 global PVC 10 0 5 20\n",
- "4 paired AMI 8 3 9 15\n",
- "5 paired CLBBB 12 2 17 4\n",
- "6 paired CRBBB 8 0 5 22\n",
- "7 paired PVC 6 1 5 23"
- ]
- },
- "execution_count": 24,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "preference_by_label = (\n",
- " prefs.groupby([\"task\", \"label\", \"preference\"])\n",
- " .size()\n",
- " .unstack(fill_value=0)\n",
- " .reset_index()\n",
- ")\n",
- "preference_by_label\n"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "0aba0956",
- "metadata": {},
- "source": [
- "### Fleiss' kappa for the binary yes/no ratings\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 25,
- "id": "0d869989",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Overall Fleiss' kappa: 0.2877\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/var/folders/9j/f0qlzhxj2klgf3bxm77sqz300000gn/T/ipykernel_34866/3937073973.py:14: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n",
- " .apply(fleiss_from_yesno)\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " label | \n",
- " fleiss_kappa | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " AMI | \n",
- " 0.2023 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " CLBBB | \n",
- " 0.0542 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " CRBBB | \n",
- " 0.3000 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " PVC | \n",
- " 0.4000 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " label fleiss_kappa\n",
- "0 AMI 0.2023\n",
- "1 CLBBB 0.0542\n",
- "2 CRBBB 0.3000\n",
- "3 PVC 0.4000"
- ]
- },
- "execution_count": 25,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "def fleiss_from_yesno(df):\n",
- " table = []\n",
- " for _, g in df.groupby([\"case_id\", \"task\", \"model\"]):\n",
- " counts = g[\"good\"].value_counts().reindex([0, 1], fill_value=0)\n",
- " table.append(counts.values)\n",
- "\n",
- " table = np.asarray(table)\n",
- " return float(fleiss_kappa(table))\n",
- "\n",
- "overall_kappa = fleiss_from_yesno(yesno)\n",
- "\n",
- "kappa_by_label = (\n",
- " yesno.groupby(\"label\", group_keys=False)\n",
- " .apply(fleiss_from_yesno)\n",
- " .rename(\"fleiss_kappa\")\n",
- " .reset_index()\n",
- ")\n",
- "\n",
- "print(\"Overall Fleiss' kappa:\", round(overall_kappa, 4))\n",
- "kappa_by_label\n"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "21e7ba5c",
- "metadata": {},
- "source": [
- "### Fleiss' kappa for the A/B/Both/Neither ratings\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 26,
- "id": "88be54ed",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Overall Fleiss' kappa (A/B/Both/Neither): 0.1953\n"
- ]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/var/folders/9j/f0qlzhxj2klgf3bxm77sqz300000gn/T/ipykernel_34866/259028057.py:18: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n",
- " .apply(fleiss_from_pref)\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " label | \n",
- " fleiss_kappa | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " AMI | \n",
- " 0.2170 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " CLBBB | \n",
- " 0.0174 | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " CRBBB | \n",
- " 0.1322 | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " PVC | \n",
- " 0.1425 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " label fleiss_kappa\n",
- "0 AMI 0.2170\n",
- "1 CLBBB 0.0174\n",
- "2 CRBBB 0.1322\n",
- "3 PVC 0.1425"
- ]
- },
- "execution_count": 26,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "from statsmodels.stats.inter_rater import fleiss_kappa\n",
- "import numpy as np\n",
- "\n",
- "def fleiss_from_pref(df):\n",
- " categories = [\"ProtoSSL\", \"ProtoECGNet\", \"Both\", \"Neither\"]\n",
- " table = []\n",
- " for _, g in df.groupby([\"case_id\", \"task\"]):\n",
- " counts = g[\"preference\"].value_counts().reindex(categories, fill_value=0)\n",
- " table.append(counts.values)\n",
- "\n",
- " table = np.asarray(table)\n",
- " return float(fleiss_kappa(table))\n",
- "\n",
- "overall_kappa_pref = fleiss_from_pref(prefs)\n",
- "\n",
- "kappa_by_label_pref = (\n",
- " prefs.groupby(\"label\", group_keys=False)\n",
- " .apply(fleiss_from_pref)\n",
- " .rename(\"fleiss_kappa\")\n",
- " .reset_index()\n",
- ")\n",
- "\n",
- "print(\"Overall Fleiss' kappa (A/B/Both/Neither):\", round(overall_kappa_pref, 4))\n",
- "kappa_by_label_pref"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "80bfa9be",
- "metadata": {},
- "source": [
- "### Compact tables for manuscript drafting"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 27,
- "id": "64519600",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " task | \n",
- " n_participants | \n",
- " ProtoSSL_mean | \n",
- " ProtoECGNet_mean | \n",
- " mean_difference | \n",
- " ci95_low | \n",
- " ci95_high | \n",
- " wilcoxon_W | \n",
- " wilcoxon_p | \n",
- " paired_t_p | \n",
- " sign_test_p | \n",
- " wilcoxon_p_holm | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " global | \n",
- " 7 | \n",
- " 0.9143 | \n",
- " 0.6643 | \n",
- " 0.2500 | \n",
- " 0.1432 | \n",
- " 0.3568 | \n",
- " 0.0000 | \n",
- " 0.0178 | \n",
- " 0.0012 | \n",
- " 0.0156 | \n",
- " 0.0355 | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " paired | \n",
- " 7 | \n",
- " 0.8286 | \n",
- " 0.6786 | \n",
- " 0.1500 | \n",
- " 0.0501 | \n",
- " 0.2499 | \n",
- " 0.0000 | \n",
- " 0.0178 | \n",
- " 0.0104 | \n",
- " 0.0156 | \n",
- " 0.0355 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " task n_participants ProtoSSL_mean ProtoECGNet_mean mean_difference ci95_low ci95_high wilcoxon_W wilcoxon_p paired_t_p sign_test_p wilcoxon_p_holm\n",
- "0 global 7 0.9143 0.6643 0.2500 0.1432 0.3568 0.0000 0.0178 0.0012 0.0156 0.0355\n",
- "1 paired 7 0.8286 0.6786 0.1500 0.0501 0.2499 0.0000 0.0178 0.0104 0.0156 0.0355"
- ]
- },
- "execution_count": 27,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "primary_results_rounded = primary_results.copy()\n",
- "for col in [\"ProtoSSL_mean\", \"ProtoECGNet_mean\", \"mean_difference\", \"ci95_low\", \"ci95_high\", \"wilcoxon_p\", \"wilcoxon_p_holm\", \"paired_t_p\", \"sign_test_p\"]:\n",
- " primary_results_rounded[col] = primary_results_rounded[col].round(4)\n",
- "primary_results_rounded\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 28,
- "id": "67c346fd",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | model | \n",
- " task | \n",
- " label | \n",
- " ProtoECGNet | \n",
- " ProtoSSL | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | 0 | \n",
- " global | \n",
- " AMI | \n",
- " 23/35 (65.7%) | \n",
- " 30/35 (85.7%) | \n",
- "
\n",
- " \n",
- " | 1 | \n",
- " global | \n",
- " CLBBB | \n",
- " 33/35 (94.3%) | \n",
- " 32/35 (91.4%) | \n",
- "
\n",
- " \n",
- " | 2 | \n",
- " global | \n",
- " CRBBB | \n",
- " 19/35 (54.3%) | \n",
- " 32/35 (91.4%) | \n",
- "
\n",
- " \n",
- " | 3 | \n",
- " global | \n",
- " PVC | \n",
- " 18/35 (51.4%) | \n",
- " 34/35 (97.1%) | \n",
- "
\n",
- " \n",
- " | 4 | \n",
- " paired | \n",
- " AMI | \n",
- " 23/35 (65.7%) | \n",
- " 28/35 (80.0%) | \n",
- "
\n",
- " \n",
- " | 5 | \n",
- " paired | \n",
- " CLBBB | \n",
- " 33/35 (94.3%) | \n",
- " 27/35 (77.1%) | \n",
- "
\n",
- " \n",
- " | 6 | \n",
- " paired | \n",
- " CRBBB | \n",
- " 19/35 (54.3%) | \n",
- " 30/35 (85.7%) | \n",
- "
\n",
- " \n",
- " | 7 | \n",
- " paired | \n",
- " PVC | \n",
- " 20/35 (57.1%) | \n",
- " 31/35 (88.6%) | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- "model task label ProtoECGNet ProtoSSL\n",
- "0 global AMI 23/35 (65.7%) 30/35 (85.7%)\n",
- "1 global CLBBB 33/35 (94.3%) 32/35 (91.4%)\n",
- "2 global CRBBB 19/35 (54.3%) 32/35 (91.4%)\n",
- "3 global PVC 18/35 (51.4%) 34/35 (97.1%)\n",
- "4 paired AMI 23/35 (65.7%) 28/35 (80.0%)\n",
- "5 paired CLBBB 33/35 (94.3%) 27/35 (77.1%)\n",
- "6 paired CRBBB 19/35 (54.3%) 30/35 (85.7%)\n",
- "7 paired PVC 20/35 (57.1%) 31/35 (88.6%)"
- ]
- },
- "execution_count": 28,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "label_yesno_pivot = label_yesno.copy()\n",
- "label_yesno_pivot[\"summary\"] = (\n",
- " label_yesno_pivot[\"n_yes\"].astype(str)\n",
- " + \"/\"\n",
- " + label_yesno_pivot[\"n_total\"].astype(str)\n",
- " + \" (\"\n",
- " + (100 * label_yesno_pivot[\"proportion\"]).round(1).astype(str)\n",
- " + \"%)\"\n",
- ")\n",
- "label_yesno_pivot = (\n",
- " label_yesno_pivot[[\"task\", \"label\", \"model\", \"summary\"]]\n",
- " .pivot(index=[\"task\", \"label\"], columns=\"model\", values=\"summary\")\n",
- " .reset_index()\n",
- ")\n",
- "label_yesno_pivot\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "51807e2d",
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "900aa255",
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "ecg_env",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.18"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}