diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index c2141c0..cd7470e 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -103,7 +103,6 @@ scripts/pretrain/run-prosup-heedb-pretrain.sh @StevenSong scripts/prototypes-from-fms @StevenSong scripts/queue-experiments.sh @StevenSong scripts/README.md @StevenSong @sahilsethi0105 -user-study/analyze_results.ipynb @StevenSong user-study/images @StevenSong user-study/metadata.csv @StevenSong user-study/prepare_samples.ipynb @StevenSong diff --git a/user-study/analyze_results.ipynb b/user-study/analyze_results.ipynb deleted file mode 100644 index 54f554e..0000000 --- a/user-study/analyze_results.ipynb +++ /dev/null @@ -1,2316 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "a8401e54", - "metadata": {}, - "source": [ - "# ProtoSSL User study analysis\n", - "\n", - "This notebook conducts:\n", - "\n", - "- **Primary analysis:** participant-level paired comparison of the proportion of responses rated as good for ProtoSSL vs ProtoECGNet, done separately for the two tasks.\n", - "- **Primary test:** two-sided **Wilcoxon signed-rank test** across participants.\n", - "- **Comparative A/B/Both/Neither question:** descriptive summaries\n", - "- **Inter-rater agreement:** **Fleiss' kappa** for the binary yes/no ratings, reported overall and by label.\n", - "\n", - "The **participants** are the primary unit of inference.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "73ba5be4", - "metadata": {}, - "outputs": [], - "source": [ - "import math\n", - "import numpy as np\n", - "import pandas as pd\n", - "from scipy.stats import wilcoxon, ttest_rel, binomtest, t as tdist\n", - "from statsmodels.stats.inter_rater import fleiss_kappa\n", - "\n", - "pd.set_option(\"display.max_columns\", None)\n", - "pd.set_option(\"display.width\", 200)\n", - "pd.set_option(\"display.float_format\", lambda x: f\"{x:.4f}\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "d094a664", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "results shape: (7, 131)\n", - "metadata shape: (20, 15)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
record_idredcap_survey_identifieruser_study_form_timestampconsentprototypes_quality_choicesprototypea_qualityprototypeb_qualityprototypes_quality_choices_2explanation_aexplanation_bcase1_prototypes_quality_choicescase1_prototypea_qualitycase1_prototypeb_qualitycase1_prototypes_quality_choices_2case1_explanation_acase1_explanation_bcase2_prototypes_quality_choicescase2_prototypea_qualitycase2_prototypeb_qualitycase2_prototypes_quality_choices_2case2_explanation_acase2_explanation_bcase3_prototypes_quality_choicescase3_prototypea_qualitycase3_prototypeb_qualitycase3_prototypes_quality_choices_2case3_explanation_acase3_explanation_bcase4_prototypes_quality_choicescase4_prototypea_qualitycase4_prototypeb_qualitycase4_prototypes_quality_choices_2case4_explanation_acase4_explanation_bcase5_prototypes_quality_choicescase5_prototypea_qualitycase5_prototypeb_qualitycase5_prototypes_quality_choices_2case5_explanation_acase5_explanation_bcase6_prototypes_quality_choicescase6_prototypea_qualitycase6_prototypeb_qualitycase6_prototypes_quality_choices_2case6_explanation_acase6_explanation_bcase7_prototypes_quality_choicescase7_prototypea_qualitycase7_prototypeb_qualitycase7_prototypes_quality_choices_2case7_explanation_acase7_explanation_bcase8_prototypes_quality_choicescase8_prototypea_qualitycase8_prototypeb_qualitycase8_prototypes_quality_choices_2case8_explanation_acase8_explanation_bcase9_prototypes_quality_choicescase9_prototypea_qualitycase9_prototypeb_qualitycase9_prototypes_quality_choices_2case9_explanation_acase9_explanation_bcase10_prototypes_quality_choicescase10_prototypea_qualitycase10_prototypeb_qualitycase10_prototypes_quality_choices_2case10_explanation_acase10_explanation_bcase11_prototypes_quality_choicescase11_prototypea_qualitycase11_prototypeb_qualitycase11_prototypes_quality_choices_2case11_explanation_acase11_explanation_bcase12_prototypes_quality_choicescase12_prototypea_qualitycase12_prototypeb_qualitycase12_prototypes_quality_choices_2case12_explanation_acase12_explanation_bcase13_prototypes_quality_choicescase13_prototypea_qualitycase13_prototypeb_qualitycase13_prototypes_quality_choices_2case13_explanation_acase13_explanation_bcase14_prototypes_quality_choicescase14_prototypea_qualitycase14_prototypeb_qualitycase14_prototypes_quality_choices_2case14_explanation_acase14_explanation_bcase15_prototypes_quality_choicescase15_prototypea_qualitycase15_prototypeb_qualitycase15_prototypes_quality_choices_2case15_explanation_acase15_explanation_bcase16_prototypes_quality_choicescase16_prototypea_qualitycase16_prototypeb_qualitycase16_prototypes_quality_choices_2case16_explanation_acase16_explanation_bcase17_prototypes_quality_choicescase17_prototypea_qualitycase17_prototypeb_qualitycase17_prototypes_quality_choices_2case17_explanation_acase17_explanation_bcase18_prototypes_quality_choicescase18_prototypea_qualitycase18_prototypeb_qualitycase18_prototypes_quality_choices_2case18_explanation_acase18_explanation_bcase19_prototypes_quality_choicescase19_prototypea_qualitycase19_prototypeb_qualitycase19_prototypes_quality_choices_2case19_explanation_acase19_explanation_bcase20_prototypes_quality_choicescase20_prototypea_qualitycase20_prototypeb_qualitycase20_prototypes_quality_choices_2case20_explanation_acase20_explanation_buser_study_form_complete
05NaN2026-04-06 18:18:5612012013113111111112112113111113113113113113113113113113111112112112113112113113113113113113113112012013111112012013113113112112
17NaN2026-04-09 11:26:5312012013111113113113002011101103113111101101111103113113111103112101101102113112013113111113112112012011101102012011111102012012
\n", - "
" - ], - "text/plain": [ - " record_id redcap_survey_identifier user_study_form_timestamp consent prototypes_quality_choices prototypea_quality prototypeb_quality prototypes_quality_choices_2 explanation_a \\\n", - "0 5 NaN 2026-04-06 18:18:56 1 2 0 1 2 0 \n", - "1 7 NaN 2026-04-09 11:26:53 1 2 0 1 2 0 \n", - "\n", - " explanation_b case1_prototypes_quality_choices case1_prototypea_quality case1_prototypeb_quality case1_prototypes_quality_choices_2 case1_explanation_a case1_explanation_b \\\n", - "0 1 3 1 1 3 1 1 \n", - "1 1 3 1 1 1 1 1 \n", - "\n", - " case2_prototypes_quality_choices case2_prototypea_quality case2_prototypeb_quality case2_prototypes_quality_choices_2 case2_explanation_a case2_explanation_b \\\n", - "0 1 1 1 1 1 1 \n", - "1 3 1 1 3 1 1 \n", - "\n", - " case3_prototypes_quality_choices case3_prototypea_quality case3_prototypeb_quality case3_prototypes_quality_choices_2 case3_explanation_a case3_explanation_b \\\n", - "0 2 1 1 2 1 1 \n", - "1 3 0 0 2 0 1 \n", - "\n", - " case4_prototypes_quality_choices case4_prototypea_quality case4_prototypeb_quality case4_prototypes_quality_choices_2 case4_explanation_a case4_explanation_b \\\n", - "0 3 1 1 1 1 1 \n", - "1 1 1 0 1 1 0 \n", - "\n", - " case5_prototypes_quality_choices case5_prototypea_quality case5_prototypeb_quality case5_prototypes_quality_choices_2 case5_explanation_a case5_explanation_b \\\n", - "0 3 1 1 3 1 1 \n", - "1 3 1 1 3 1 1 \n", - "\n", - " case6_prototypes_quality_choices case6_prototypea_quality case6_prototypeb_quality case6_prototypes_quality_choices_2 case6_explanation_a case6_explanation_b \\\n", - "0 3 1 1 3 1 1 \n", - "1 1 1 0 1 1 0 \n", - "\n", - " case7_prototypes_quality_choices case7_prototypea_quality case7_prototypeb_quality case7_prototypes_quality_choices_2 case7_explanation_a case7_explanation_b \\\n", - "0 3 1 1 3 1 1 \n", - "1 1 1 1 1 1 0 \n", - "\n", - " case8_prototypes_quality_choices case8_prototypea_quality case8_prototypeb_quality case8_prototypes_quality_choices_2 case8_explanation_a case8_explanation_b \\\n", - "0 3 1 1 3 1 1 \n", - "1 3 1 1 3 1 1 \n", - "\n", - " case9_prototypes_quality_choices case9_prototypea_quality case9_prototypeb_quality case9_prototypes_quality_choices_2 case9_explanation_a case9_explanation_b \\\n", - "0 3 1 1 1 1 1 \n", - "1 3 1 1 1 1 0 \n", - "\n", - " case10_prototypes_quality_choices case10_prototypea_quality case10_prototypeb_quality case10_prototypes_quality_choices_2 case10_explanation_a case10_explanation_b \\\n", - "0 2 1 1 2 1 1 \n", - "1 3 1 1 2 1 0 \n", - "\n", - " case11_prototypes_quality_choices case11_prototypea_quality case11_prototypeb_quality case11_prototypes_quality_choices_2 case11_explanation_a case11_explanation_b \\\n", - "0 2 1 1 3 1 1 \n", - "1 1 1 0 1 1 0 \n", - "\n", - " case12_prototypes_quality_choices case12_prototypea_quality case12_prototypeb_quality case12_prototypes_quality_choices_2 case12_explanation_a case12_explanation_b \\\n", - "0 2 1 1 3 1 1 \n", - "1 2 1 1 3 1 1 \n", - "\n", - " case13_prototypes_quality_choices case13_prototypea_quality case13_prototypeb_quality case13_prototypes_quality_choices_2 case13_explanation_a case13_explanation_b \\\n", - "0 3 1 1 3 1 1 \n", - "1 2 0 1 3 1 1 \n", - "\n", - " case14_prototypes_quality_choices case14_prototypea_quality case14_prototypeb_quality case14_prototypes_quality_choices_2 case14_explanation_a case14_explanation_b \\\n", - "0 3 1 1 3 1 1 \n", - "1 3 1 1 1 1 1 \n", - "\n", - " case15_prototypes_quality_choices case15_prototypea_quality case15_prototypeb_quality case15_prototypes_quality_choices_2 case15_explanation_a case15_explanation_b \\\n", - "0 3 1 1 3 1 1 \n", - "1 3 1 1 2 1 1 \n", - "\n", - " case16_prototypes_quality_choices case16_prototypea_quality case16_prototypeb_quality case16_prototypes_quality_choices_2 case16_explanation_a case16_explanation_b \\\n", - "0 2 0 1 2 0 1 \n", - "1 2 0 1 2 0 1 \n", - "\n", - " case17_prototypes_quality_choices case17_prototypea_quality case17_prototypeb_quality case17_prototypes_quality_choices_2 case17_explanation_a case17_explanation_b \\\n", - "0 3 1 1 1 1 1 \n", - "1 1 1 0 1 1 0 \n", - "\n", - " case18_prototypes_quality_choices case18_prototypea_quality case18_prototypeb_quality case18_prototypes_quality_choices_2 case18_explanation_a case18_explanation_b \\\n", - "0 2 0 1 2 0 1 \n", - "1 2 0 1 2 0 1 \n", - "\n", - " case19_prototypes_quality_choices case19_prototypea_quality case19_prototypeb_quality case19_prototypes_quality_choices_2 case19_explanation_a case19_explanation_b \\\n", - "0 3 1 1 3 1 1 \n", - "1 1 1 1 1 1 0 \n", - "\n", - " case20_prototypes_quality_choices case20_prototypea_quality case20_prototypeb_quality case20_prototypes_quality_choices_2 case20_explanation_a case20_explanation_b user_study_form_complete \n", - "0 3 1 1 2 1 1 2 \n", - "1 2 0 1 2 0 1 2 " - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "results = pd.read_csv(\"results.csv\")\n", - "metadata = pd.read_csv(\"metadata.csv\")\n", - "\n", - "print(\"results shape:\", results.shape)\n", - "print(\"metadata shape:\", metadata.shape)\n", - "results.head(2)\n" - ] - }, - { - "cell_type": "markdown", - "id": "7f870f0f", - "metadata": {}, - "source": [ - "### Decode REDCap responses into analysis tables\n", - "\n", - "`yesno` contains one row per participant × case × task × model for the binary yes/no questions.\n", - "\n", - "`prefs` contains one row per participant × case × task for the A/B/Both/Neither comparative question, decoded back to the actual model identities using the metadata file.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "16acb6b2", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "yesno shape: (560, 6)\n", - "prefs shape: (280, 5)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
participantcase_idlabeltaskmodelgood
051AMIglobalProtoSSL1
151AMIglobalProtoECGNet1
251AMIpairedProtoSSL1
351AMIpairedProtoECGNet1
452AMIglobalProtoSSL1
\n", - "
" - ], - "text/plain": [ - " participant case_id label task model good\n", - "0 5 1 AMI global ProtoSSL 1\n", - "1 5 1 AMI global ProtoECGNet 1\n", - "2 5 1 AMI paired ProtoSSL 1\n", - "3 5 1 AMI paired ProtoECGNet 1\n", - "4 5 2 AMI global ProtoSSL 1" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "case_map = (\n", - " metadata.rename(\n", - " columns={\n", - " \"Study Index\": \"case_id\",\n", - " \"Label\": \"label\",\n", - " \"ProtoSSL Assignment\": \"ssl_assignment\",\n", - " \"ProtoECGNet Assignment\": \"ecg_assignment\",\n", - " }\n", - " )[[\"case_id\", \"label\", \"ssl_assignment\", \"ecg_assignment\"]]\n", - " .copy()\n", - ")\n", - "case_map[\"case_id\"] = case_map[\"case_id\"].astype(int)\n", - "\n", - "pref_code = {1: \"A\", 2: \"B\", 3: \"Both\", 4: \"Neither\"}\n", - "\n", - "yes_rows = []\n", - "pref_rows = []\n", - "\n", - "for _, row in results.iterrows():\n", - " participant = int(row[\"record_id\"])\n", - " for case_id in range(1, 21):\n", - " meta_row = case_map.loc[case_map[\"case_id\"] == case_id].iloc[0]\n", - "\n", - " task_specs = [\n", - " (\"global\",\n", - " f\"case{case_id}_prototypea_quality\",\n", - " f\"case{case_id}_prototypeb_quality\",\n", - " f\"case{case_id}_prototypes_quality_choices\"),\n", - " (\"paired\",\n", - " f\"case{case_id}_explanation_a\",\n", - " f\"case{case_id}_explanation_b\",\n", - " f\"case{case_id}_prototypes_quality_choices_2\"),\n", - " ]\n", - "\n", - " for task, a_col, b_col, pref_col in task_specs:\n", - " for shown_letter, col in [(\"A\", a_col), (\"B\", b_col)]:\n", - " actual_model = \"ProtoSSL\" if meta_row[\"ssl_assignment\"] == shown_letter else \"ProtoECGNet\"\n", - " yes_rows.append(\n", - " {\n", - " \"participant\": participant,\n", - " \"case_id\": case_id,\n", - " \"label\": meta_row[\"label\"],\n", - " \"task\": task,\n", - " \"model\": actual_model,\n", - " \"good\": int(row[col]),\n", - " }\n", - " )\n", - "\n", - " pref_value = pref_code[int(row[pref_col])]\n", - " if pref_value in [\"A\", \"B\"]:\n", - " actual_pref = \"ProtoSSL\" if meta_row[\"ssl_assignment\"] == pref_value else \"ProtoECGNet\"\n", - " else:\n", - " actual_pref = pref_value\n", - "\n", - " pref_rows.append(\n", - " {\n", - " \"participant\": participant,\n", - " \"case_id\": case_id,\n", - " \"label\": meta_row[\"label\"],\n", - " \"task\": task,\n", - " \"preference\": actual_pref,\n", - " }\n", - " )\n", - "\n", - "yesno = pd.DataFrame(yes_rows)\n", - "prefs = pd.DataFrame(pref_rows)\n", - "\n", - "print(\"yesno shape:\", yesno.shape)\n", - "print(\"prefs shape:\", prefs.shape)\n", - "yesno.head()\n" - ] - }, - { - "cell_type": "markdown", - "id": "0dce4296", - "metadata": {}, - "source": [ - "### Descriptive summaries for the binary yes/no questions" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "961b8054", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
taskmodeln_yesn_totalproportion
0globalProtoECGNet931400.6643
1globalProtoSSL1281400.9143
2pairedProtoECGNet951400.6786
3pairedProtoSSL1161400.8286
\n", - "
" - ], - "text/plain": [ - " task model n_yes n_total proportion\n", - "0 global ProtoECGNet 93 140 0.6643\n", - "1 global ProtoSSL 128 140 0.9143\n", - "2 paired ProtoECGNet 95 140 0.6786\n", - "3 paired ProtoSSL 116 140 0.8286" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "overall_yesno = (\n", - " yesno.groupby([\"task\", \"model\"])[\"good\"]\n", - " .agg(n_yes=\"sum\", n_total=\"count\", proportion=\"mean\")\n", - " .reset_index()\n", - ")\n", - "overall_yesno\n" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "da09502f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
tasklabelmodeln_yesn_totalproportion
0globalAMIProtoECGNet23350.6571
1globalAMIProtoSSL30350.8571
2globalCLBBBProtoECGNet33350.9429
3globalCLBBBProtoSSL32350.9143
4globalCRBBBProtoECGNet19350.5429
5globalCRBBBProtoSSL32350.9143
6globalPVCProtoECGNet18350.5143
7globalPVCProtoSSL34350.9714
8pairedAMIProtoECGNet23350.6571
9pairedAMIProtoSSL28350.8000
10pairedCLBBBProtoECGNet33350.9429
11pairedCLBBBProtoSSL27350.7714
12pairedCRBBBProtoECGNet19350.5429
13pairedCRBBBProtoSSL30350.8571
14pairedPVCProtoECGNet20350.5714
15pairedPVCProtoSSL31350.8857
\n", - "
" - ], - "text/plain": [ - " task label model n_yes n_total proportion\n", - "0 global AMI ProtoECGNet 23 35 0.6571\n", - "1 global AMI ProtoSSL 30 35 0.8571\n", - "2 global CLBBB ProtoECGNet 33 35 0.9429\n", - "3 global CLBBB ProtoSSL 32 35 0.9143\n", - "4 global CRBBB ProtoECGNet 19 35 0.5429\n", - "5 global CRBBB ProtoSSL 32 35 0.9143\n", - "6 global PVC ProtoECGNet 18 35 0.5143\n", - "7 global PVC ProtoSSL 34 35 0.9714\n", - "8 paired AMI ProtoECGNet 23 35 0.6571\n", - "9 paired AMI ProtoSSL 28 35 0.8000\n", - "10 paired CLBBB ProtoECGNet 33 35 0.9429\n", - "11 paired CLBBB ProtoSSL 27 35 0.7714\n", - "12 paired CRBBB ProtoECGNet 19 35 0.5429\n", - "13 paired CRBBB ProtoSSL 30 35 0.8571\n", - "14 paired PVC ProtoECGNet 20 35 0.5714\n", - "15 paired PVC ProtoSSL 31 35 0.8857" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "label_yesno = (\n", - " yesno.groupby([\"task\", \"label\", \"model\"])[\"good\"]\n", - " .agg(n_yes=\"sum\", n_total=\"count\", proportion=\"mean\")\n", - " .reset_index()\n", - ")\n", - "label_yesno\n" - ] - }, - { - "cell_type": "markdown", - "id": "03a50af7", - "metadata": {}, - "source": [ - "### Primary analysis\n", - "\n", - "For each participant and each task, compute the proportion of responses rated as good for each model across the 20 cases. Then compare ProtoSSL vs ProtoECGNet with a **paired Wilcoxon signed-rank test**.\n", - "\n", - "Because there are **two primary task-level hypotheses** (`global` and `paired`), apply **Holm correction** across those two p-values.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "3194ed44", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
modelparticipanttaskProtoECGNetProtoSSLdifference
05global0.90001.00000.1000
15paired0.90001.00000.1000
27global0.70000.80000.1000
37paired0.60000.80000.2000
48global0.45000.80000.3500
58paired0.50000.55000.0500
69global0.60000.95000.3500
79paired0.55000.90000.3500
810global0.60000.95000.3500
910paired0.75000.95000.2000
1011global0.65000.95000.3000
1111paired0.80000.90000.1000
1212global0.75000.95000.2000
1312paired0.65000.70000.0500
\n", - "
" - ], - "text/plain": [ - "model participant task ProtoECGNet ProtoSSL difference\n", - "0 5 global 0.9000 1.0000 0.1000\n", - "1 5 paired 0.9000 1.0000 0.1000\n", - "2 7 global 0.7000 0.8000 0.1000\n", - "3 7 paired 0.6000 0.8000 0.2000\n", - "4 8 global 0.4500 0.8000 0.3500\n", - "5 8 paired 0.5000 0.5500 0.0500\n", - "6 9 global 0.6000 0.9500 0.3500\n", - "7 9 paired 0.5500 0.9000 0.3500\n", - "8 10 global 0.6000 0.9500 0.3500\n", - "9 10 paired 0.7500 0.9500 0.2000\n", - "10 11 global 0.6500 0.9500 0.3000\n", - "11 11 paired 0.8000 0.9000 0.1000\n", - "12 12 global 0.7500 0.9500 0.2000\n", - "13 12 paired 0.6500 0.7000 0.0500" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "participant_summary = (\n", - " yesno.groupby([\"participant\", \"task\", \"model\"])[\"good\"]\n", - " .mean()\n", - " .unstack(\"model\")\n", - " .reset_index()\n", - ")\n", - "\n", - "participant_summary[\"difference\"] = (\n", - " participant_summary[\"ProtoSSL\"] - participant_summary[\"ProtoECGNet\"]\n", - ")\n", - "\n", - "participant_summary\n" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "b6ff4f59", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
taskn_participantsProtoSSL_meanProtoECGNet_meanmean_differenceci95_lowci95_highwilcoxon_Wwilcoxon_ppaired_t_psign_test_pwilcoxon_p_holm
0global70.91430.66430.25000.14320.35680.00000.01780.00120.01560.0355
1paired70.82860.67860.15000.05010.24990.00000.01780.01040.01560.0355
\n", - "
" - ], - "text/plain": [ - " task n_participants ProtoSSL_mean ProtoECGNet_mean mean_difference ci95_low ci95_high wilcoxon_W wilcoxon_p paired_t_p sign_test_p wilcoxon_p_holm\n", - "0 global 7 0.9143 0.6643 0.2500 0.1432 0.3568 0.0000 0.0178 0.0012 0.0156 0.0355\n", - "1 paired 7 0.8286 0.6786 0.1500 0.0501 0.2499 0.0000 0.0178 0.0104 0.0156 0.0355" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "def holm_adjust(pvalues):\n", - " pvalues = np.asarray(pvalues, dtype=float)\n", - " m = len(pvalues)\n", - " order = np.argsort(pvalues)\n", - " adjusted = np.empty_like(pvalues)\n", - " running_max = 0.0\n", - "\n", - " for rank, idx in enumerate(order):\n", - " candidate = (m - rank) * pvalues[idx]\n", - " running_max = max(running_max, candidate)\n", - " adjusted[idx] = min(running_max, 1.0)\n", - "\n", - " return adjusted\n", - "\n", - "def participant_level_analysis(df):\n", - " rows = []\n", - " raw_wilcoxon_p = []\n", - "\n", - " for task in [\"global\", \"paired\"]:\n", - " sub = df[df[\"task\"] == task].copy()\n", - " diffs = sub[\"difference\"].to_numpy()\n", - "\n", - " w = wilcoxon(diffs, alternative=\"two-sided\", zero_method=\"wilcox\", method=\"approx\")\n", - " ttest = ttest_rel(sub[\"ProtoSSL\"], sub[\"ProtoECGNet\"])\n", - " sign = binomtest(np.sum(diffs > 0), np.sum(diffs != 0), p=0.5, alternative=\"two-sided\")\n", - "\n", - " mean_diff = float(np.mean(diffs))\n", - " sd_diff = float(np.std(diffs, ddof=1))\n", - " se_diff = sd_diff / math.sqrt(len(diffs))\n", - " tcrit = tdist.ppf(0.975, df=len(diffs) - 1)\n", - " ci_low = mean_diff - tcrit * se_diff\n", - " ci_high = mean_diff + tcrit * se_diff\n", - "\n", - " rows.append(\n", - " {\n", - " \"task\": task,\n", - " \"n_participants\": len(sub),\n", - " \"ProtoSSL_mean\": sub[\"ProtoSSL\"].mean(),\n", - " \"ProtoECGNet_mean\": sub[\"ProtoECGNet\"].mean(),\n", - " \"mean_difference\": mean_diff,\n", - " \"ci95_low\": ci_low,\n", - " \"ci95_high\": ci_high,\n", - " \"wilcoxon_W\": float(w.statistic),\n", - " \"wilcoxon_p\": float(w.pvalue),\n", - " \"paired_t_p\": float(ttest.pvalue),\n", - " \"sign_test_p\": float(sign.pvalue),\n", - " }\n", - " )\n", - " raw_wilcoxon_p.append(float(w.pvalue))\n", - "\n", - " out = pd.DataFrame(rows)\n", - " out[\"wilcoxon_p_holm\"] = holm_adjust(out[\"wilcoxon_p\"].to_numpy())\n", - " return out\n", - "\n", - "primary_results = participant_level_analysis(participant_summary)\n", - "primary_results\n" - ] - }, - { - "cell_type": "markdown", - "id": "2c182260", - "metadata": {}, - "source": [ - "## Per-label participant-level summaries\n", - "\n", - "These are useful to show **where** the overall pattern comes from, but I recommend keeping them **descriptive only** in the paper because each label has only 5 cases.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "53b57b45", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
tasklabelProtoSSL_meanProtoECGNet_meanmean_difference
0globalAMI0.85710.65710.2000
1globalCLBBB0.91430.9429-0.0286
2globalCRBBB0.91430.54290.3714
3globalPVC0.97140.51430.4571
4pairedAMI0.80000.65710.1429
5pairedCLBBB0.77140.9429-0.1714
6pairedCRBBB0.85710.54290.3143
7pairedPVC0.88570.57140.3143
\n", - "
" - ], - "text/plain": [ - " task label ProtoSSL_mean ProtoECGNet_mean mean_difference\n", - "0 global AMI 0.8571 0.6571 0.2000\n", - "1 global CLBBB 0.9143 0.9429 -0.0286\n", - "2 global CRBBB 0.9143 0.5429 0.3714\n", - "3 global PVC 0.9714 0.5143 0.4571\n", - "4 paired AMI 0.8000 0.6571 0.1429\n", - "5 paired CLBBB 0.7714 0.9429 -0.1714\n", - "6 paired CRBBB 0.8571 0.5429 0.3143\n", - "7 paired PVC 0.8857 0.5714 0.3143" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "participant_by_label = (\n", - " yesno.groupby([\"participant\", \"task\", \"label\", \"model\"])[\"good\"]\n", - " .mean()\n", - " .unstack(\"model\")\n", - " .reset_index()\n", - ")\n", - "participant_by_label[\"difference\"] = (\n", - " participant_by_label[\"ProtoSSL\"] - participant_by_label[\"ProtoECGNet\"]\n", - ")\n", - "\n", - "per_label_summary = (\n", - " participant_by_label.groupby([\"task\", \"label\"])\n", - " .agg(\n", - " ProtoSSL_mean=(\"ProtoSSL\", \"mean\"),\n", - " ProtoECGNet_mean=(\"ProtoECGNet\", \"mean\"),\n", - " mean_difference=(\"difference\", \"mean\"),\n", - " )\n", - " .reset_index()\n", - ")\n", - "per_label_summary\n" - ] - }, - { - "cell_type": "markdown", - "id": "e1dd8d16", - "metadata": {}, - "source": [ - "### Descriptive summaries for the comparative A/B/Both/Neither question" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "06bb6c2f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
preferencetaskBothNeitherProtoECGNetProtoSSL
0global5102564
1paired3463664
\n", - "
" - ], - "text/plain": [ - "preference task Both Neither ProtoECGNet ProtoSSL\n", - "0 global 51 0 25 64\n", - "1 paired 34 6 36 64" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "preference_overall = (\n", - " prefs.groupby([\"task\", \"preference\"])\n", - " .size()\n", - " .unstack(fill_value=0)\n", - " .reset_index()\n", - ")\n", - "preference_overall\n" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "a324d62e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
preferencetasklabelBothNeitherProtoECGNetProtoSSL
0globalAMI160514
1globalCLBBB170108
2globalCRBBB80522
3globalPVC100520
4pairedAMI83915
5pairedCLBBB122174
6pairedCRBBB80522
7pairedPVC61523
\n", - "
" - ], - "text/plain": [ - "preference task label Both Neither ProtoECGNet ProtoSSL\n", - "0 global AMI 16 0 5 14\n", - "1 global CLBBB 17 0 10 8\n", - "2 global CRBBB 8 0 5 22\n", - "3 global PVC 10 0 5 20\n", - "4 paired AMI 8 3 9 15\n", - "5 paired CLBBB 12 2 17 4\n", - "6 paired CRBBB 8 0 5 22\n", - "7 paired PVC 6 1 5 23" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "preference_by_label = (\n", - " prefs.groupby([\"task\", \"label\", \"preference\"])\n", - " .size()\n", - " .unstack(fill_value=0)\n", - " .reset_index()\n", - ")\n", - "preference_by_label\n" - ] - }, - { - "cell_type": "markdown", - "id": "0aba0956", - "metadata": {}, - "source": [ - "### Fleiss' kappa for the binary yes/no ratings\n" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "0d869989", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Overall Fleiss' kappa: 0.2877\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/9j/f0qlzhxj2klgf3bxm77sqz300000gn/T/ipykernel_34866/3937073973.py:14: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n", - " .apply(fleiss_from_yesno)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
labelfleiss_kappa
0AMI0.2023
1CLBBB0.0542
2CRBBB0.3000
3PVC0.4000
\n", - "
" - ], - "text/plain": [ - " label fleiss_kappa\n", - "0 AMI 0.2023\n", - "1 CLBBB 0.0542\n", - "2 CRBBB 0.3000\n", - "3 PVC 0.4000" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "def fleiss_from_yesno(df):\n", - " table = []\n", - " for _, g in df.groupby([\"case_id\", \"task\", \"model\"]):\n", - " counts = g[\"good\"].value_counts().reindex([0, 1], fill_value=0)\n", - " table.append(counts.values)\n", - "\n", - " table = np.asarray(table)\n", - " return float(fleiss_kappa(table))\n", - "\n", - "overall_kappa = fleiss_from_yesno(yesno)\n", - "\n", - "kappa_by_label = (\n", - " yesno.groupby(\"label\", group_keys=False)\n", - " .apply(fleiss_from_yesno)\n", - " .rename(\"fleiss_kappa\")\n", - " .reset_index()\n", - ")\n", - "\n", - "print(\"Overall Fleiss' kappa:\", round(overall_kappa, 4))\n", - "kappa_by_label\n" - ] - }, - { - "cell_type": "markdown", - "id": "21e7ba5c", - "metadata": {}, - "source": [ - "### Fleiss' kappa for the A/B/Both/Neither ratings\n" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "88be54ed", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Overall Fleiss' kappa (A/B/Both/Neither): 0.1953\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/9j/f0qlzhxj2klgf3bxm77sqz300000gn/T/ipykernel_34866/259028057.py:18: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.\n", - " .apply(fleiss_from_pref)\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
labelfleiss_kappa
0AMI0.2170
1CLBBB0.0174
2CRBBB0.1322
3PVC0.1425
\n", - "
" - ], - "text/plain": [ - " label fleiss_kappa\n", - "0 AMI 0.2170\n", - "1 CLBBB 0.0174\n", - "2 CRBBB 0.1322\n", - "3 PVC 0.1425" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from statsmodels.stats.inter_rater import fleiss_kappa\n", - "import numpy as np\n", - "\n", - "def fleiss_from_pref(df):\n", - " categories = [\"ProtoSSL\", \"ProtoECGNet\", \"Both\", \"Neither\"]\n", - " table = []\n", - " for _, g in df.groupby([\"case_id\", \"task\"]):\n", - " counts = g[\"preference\"].value_counts().reindex(categories, fill_value=0)\n", - " table.append(counts.values)\n", - "\n", - " table = np.asarray(table)\n", - " return float(fleiss_kappa(table))\n", - "\n", - "overall_kappa_pref = fleiss_from_pref(prefs)\n", - "\n", - "kappa_by_label_pref = (\n", - " prefs.groupby(\"label\", group_keys=False)\n", - " .apply(fleiss_from_pref)\n", - " .rename(\"fleiss_kappa\")\n", - " .reset_index()\n", - ")\n", - "\n", - "print(\"Overall Fleiss' kappa (A/B/Both/Neither):\", round(overall_kappa_pref, 4))\n", - "kappa_by_label_pref" - ] - }, - { - "cell_type": "markdown", - "id": "80bfa9be", - "metadata": {}, - "source": [ - "### Compact tables for manuscript drafting" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "64519600", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
taskn_participantsProtoSSL_meanProtoECGNet_meanmean_differenceci95_lowci95_highwilcoxon_Wwilcoxon_ppaired_t_psign_test_pwilcoxon_p_holm
0global70.91430.66430.25000.14320.35680.00000.01780.00120.01560.0355
1paired70.82860.67860.15000.05010.24990.00000.01780.01040.01560.0355
\n", - "
" - ], - "text/plain": [ - " task n_participants ProtoSSL_mean ProtoECGNet_mean mean_difference ci95_low ci95_high wilcoxon_W wilcoxon_p paired_t_p sign_test_p wilcoxon_p_holm\n", - "0 global 7 0.9143 0.6643 0.2500 0.1432 0.3568 0.0000 0.0178 0.0012 0.0156 0.0355\n", - "1 paired 7 0.8286 0.6786 0.1500 0.0501 0.2499 0.0000 0.0178 0.0104 0.0156 0.0355" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "primary_results_rounded = primary_results.copy()\n", - "for col in [\"ProtoSSL_mean\", \"ProtoECGNet_mean\", \"mean_difference\", \"ci95_low\", \"ci95_high\", \"wilcoxon_p\", \"wilcoxon_p_holm\", \"paired_t_p\", \"sign_test_p\"]:\n", - " primary_results_rounded[col] = primary_results_rounded[col].round(4)\n", - "primary_results_rounded\n" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "67c346fd", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
modeltasklabelProtoECGNetProtoSSL
0globalAMI23/35 (65.7%)30/35 (85.7%)
1globalCLBBB33/35 (94.3%)32/35 (91.4%)
2globalCRBBB19/35 (54.3%)32/35 (91.4%)
3globalPVC18/35 (51.4%)34/35 (97.1%)
4pairedAMI23/35 (65.7%)28/35 (80.0%)
5pairedCLBBB33/35 (94.3%)27/35 (77.1%)
6pairedCRBBB19/35 (54.3%)30/35 (85.7%)
7pairedPVC20/35 (57.1%)31/35 (88.6%)
\n", - "
" - ], - "text/plain": [ - "model task label ProtoECGNet ProtoSSL\n", - "0 global AMI 23/35 (65.7%) 30/35 (85.7%)\n", - "1 global CLBBB 33/35 (94.3%) 32/35 (91.4%)\n", - "2 global CRBBB 19/35 (54.3%) 32/35 (91.4%)\n", - "3 global PVC 18/35 (51.4%) 34/35 (97.1%)\n", - "4 paired AMI 23/35 (65.7%) 28/35 (80.0%)\n", - "5 paired CLBBB 33/35 (94.3%) 27/35 (77.1%)\n", - "6 paired CRBBB 19/35 (54.3%) 30/35 (85.7%)\n", - "7 paired PVC 20/35 (57.1%) 31/35 (88.6%)" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "label_yesno_pivot = label_yesno.copy()\n", - "label_yesno_pivot[\"summary\"] = (\n", - " label_yesno_pivot[\"n_yes\"].astype(str)\n", - " + \"/\"\n", - " + label_yesno_pivot[\"n_total\"].astype(str)\n", - " + \" (\"\n", - " + (100 * label_yesno_pivot[\"proportion\"]).round(1).astype(str)\n", - " + \"%)\"\n", - ")\n", - "label_yesno_pivot = (\n", - " label_yesno_pivot[[\"task\", \"label\", \"model\", \"summary\"]]\n", - " .pivot(index=[\"task\", \"label\"], columns=\"model\", values=\"summary\")\n", - " .reset_index()\n", - ")\n", - "label_yesno_pivot\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "51807e2d", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "900aa255", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "ecg_env", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.18" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}