# Miscellaneous poster stats

## Notebook setup

In [1]:
#| code-fold: true
#| code-summary: "Click to see packages imported"
import os
import configparser
from pathlib import Path

import wfdb
import pandas as pd
import numpy as np

In [2]:
#|include: false
# If the current working directory is the nbs/ folder, change to the project 
# root directory instead.

if Path.cwd().stem == "nbs":
    os.chdir(Path.cwd().parent)
print(f"The current working directory is {Path.cwd()}")

The current working directory is /Users/shaun/source/Thesis/PhysioNetChallenge2020


In [3]:
#|include: false
# Import configuration settings, like location of data directory.
config = configparser.ConfigParser()
if not Path("config.ini").exists():
    print("WARNING: Please generate a config.ini file by running scripts/get_datasets.py")
else:
    config.read("config.ini")
    data_dir = Path((config["datasets"]["path"])).expanduser()
    print(f"Datasets are located at {data_dir.resolve()}")

Datasets are located at /Users/shaun/source/Thesis/PhysioNetChallenge2020/data


## Norwegian endurance athletes

In [4]:
from sklearn.metrics import confusion_matrix

from src.data.util import get_all_records, diagnosis_codes, codes_to_label_vector
from src.data import norwegian

In [5]:
athlete_ecg_dir = data_dir / "norwegian-athlete-ecg" / "1.0.0"

In [6]:
for entry in get_all_records(athlete_ecg_dir):
    # Actual labels from dataset
    record = wfdb.rdrecord(athlete_ecg_dir / entry)
    comments_c = record.comments[1]
    findings_c = norwegian.extract_findings(comments_c)

    actual_findings = norwegian.classify_relevant_findings(findings_c)
    # TODO: print sl12 findigns

    print(entry)
    for finding in actual_findings:
        print(diagnosis_codes[finding])
    print()

ath_022
Normal sinus rhythm

ath_023
Sinus bradycardia

ath_021
Sinus arrhythmia

ath_009
Normal sinus rhythm

ath_008
Normal sinus rhythm
Incomplete right bundle branch block

ath_020
Normal sinus rhythm

ath_018
Sinus arrhythmia

ath_024
Sinus bradycardia
Incomplete right bundle branch block

ath_025
Normal sinus rhythm

ath_019
Sinus arrhythmia

ath_027
Sinus bradycardia

ath_026
Normal sinus rhythm

ath_003
Normal sinus rhythm

ath_017
Normal sinus rhythm

ath_016
Normal sinus rhythm

ath_002
Sinus arrhythmia

ath_014
Normal sinus rhythm

ath_028
Normal sinus rhythm

ath_001
Sinus arrhythmia

ath_015
Normal sinus rhythm

ath_011
Sinus arrhythmia

ath_005
Sinus bradycardia
Normal sinus rhythm

ath_004
Normal sinus rhythm

ath_010
Normal sinus rhythm

ath_006
Normal sinus rhythm

ath_012
Sinus arrhythmia

ath_013
Sinus bradycardia

ath_007
Normal sinus rhythm



In [7]:
2/28

0.07142857142857142

In [8]:
for entry in get_all_records(athlete_ecg_dir):
    # SL12 predicted labels from dataset
    record = wfdb.rdrecord(athlete_ecg_dir / entry)
    comments_sl12 = record.comments[0]
    findings_sl12 = norwegian.extract_findings(comments_sl12)

    predicted_findings = norwegian.classify_relevant_findings(findings_sl12)

    print(entry)
    for finding in predicted_findings:
        print(diagnosis_codes[finding])
    print()

ath_022
Normal sinus rhythm

ath_023
Sinus bradycardia

ath_021
Sinus arrhythmia
Sinus bradycardia

ath_009
Sinus bradycardia

ath_008
Normal sinus rhythm

ath_020
Normal sinus rhythm

ath_018
Sinus arrhythmia
Sinus bradycardia

ath_024
Sinus bradycardia
Incomplete right bundle branch block

ath_025
Sinus bradycardia

ath_019
Sinus arrhythmia
Normal sinus rhythm

ath_027
Sinus bradycardia

ath_026
Normal sinus rhythm

ath_003
Sinus arrhythmia
Sinus bradycardia

ath_017

ath_016
Sinus bradycardia

ath_002
Sinus arrhythmia
Sinus bradycardia

ath_014
Normal sinus rhythm

ath_028
Sinus bradycardia

ath_001
Sinus arrhythmia
Sinus bradycardia

ath_015
Sinus bradycardia

ath_011
Sinus arrhythmia
Normal sinus rhythm

ath_005
Sinus arrhythmia
Sinus bradycardia

ath_004
Sinus bradycardia

ath_010
Sinus bradycardia

ath_006
Sinus bradycardia

ath_012
Sinus arrhythmia
Sinus bradycardia

ath_013
Sinus bradycardia

ath_007
Normal sinus rhythm
Incomplete right bundle branch block



## Scoring SL12 with confusion matrix

In [41]:


# sinus_labels = [426177001, 426783006, 427084000, 427393009]
# sinus_labels = [426177001, 426783006]       # Bradycardia or Normal
# sinus_labels = [426177001, 427084000, 427393009]    # Brady, Tachy, Arrythmia

brady_label = [426177001]
arry_label = [427393009]
rbbb_labels = [713427006, 713426002]        # Incomplete RBBB, Complete RBBB
athlete_labels = arry_label

total_confusion = np.zeros((2,2), dtype=int)
for entry in get_all_records(athlete_ecg_dir):
    # Actual labels from cardiologist
    record = wfdb.rdrecord(athlete_ecg_dir / entry)
    comments_c = record.comments[1]
    findings_c = norwegian.extract_findings(comments_c)
    actual_findings = norwegian.classify_relevant_findings(findings_c)
    actual_labels = codes_to_label_vector(actual_findings, athlete_labels)

    # Predicted labels from SL12 algorithm
    comments_sl12 = record.comments[0]
    findings_sl12 = norwegian.extract_findings(comments_sl12)
    predicted_findings = norwegian.classify_relevant_findings(findings_sl12)
    predicted_labels = codes_to_label_vector(predicted_findings, athlete_labels)

    # # Hack: If no sinus rhythm findings, assume normal sinus rhythm (426783006)
    # if sum(actual_labels) == 0:
    #     actual_labels[1] = 1
    # if sum(predicted_labels) == 0:
    #     predicted_labels[1] = 1

    # if "Incomplete right bundle branch block" in findings_c:
    #     print(f"{entry}")
    #     print(actual_findings)
    #     print(actual_labels)
    
    # Calculate confusion matrix for entry, add to total
    for i in range(len(athlete_labels)):
        if actual_labels[i] == predicted_labels[i]:
            if actual_labels[i] == 1:
                total_confusion[1][1] += 1  # tp
            if actual_labels[i] == 0:
                total_confusion[0][0] += 1  # tn
        else:
            if actual_labels[i] == 1:
                total_confusion[1][0] += 1  # fn
            if actual_labels[i] == 0:
                total_confusion[0][1] += 1  # fp
    
    # total_confusion += confusion_matrix(actual_labels, predicted_labels)

# confusion_matrix(actual_labels, predicted_labels)

In [42]:
total_confusion

array([[19,  2],
       [ 0,  7]])

In [39]:
def print_classifier_metrics(tn, fp, fn, tp):
    P = tp + fn
    N = fp + tn
    print("Population")
    print("----------")
    print(f"Total population: {P+N}")
    print(f"Positive: {P}")
    print(f"Negative: {N}\n")

    acc = (tp + tn) / (P+N)
    ppv = tp / (tp + fp)
    f1 = 2 * tp / (2*tp + fp + fn)
    fpr = fp / N
    fnr = fn / P
    print("Performance")
    print("-----------")
    print(f"Accuracy: {acc}")
    print(f"Precision: {ppv}")
    print(f"F1-Score: {f1}")
    print(f"False-positive rate: {fpr}")
    print(f"False-negative rate: {fnr}")

Accuracy is a VERY misleading metric.

In [43]:
tn, fp, fn, tp = total_confusion.ravel()
print_classifier_metrics(tn, fp, fn, tp)

Population
----------
Total population: 28
Positive: 7
Negative: 21

Performance
-----------
Accuracy: 0.9285714285714286
Precision: 0.7777777777777778
F1-Score: 0.875
False-positive rate: 0.09523809523809523
False-negative rate: 0.0
