# Overall pipeline

In [2]:
from datetime                      import datetime
import invalid_sample_detection    as invalid
import evaluation                  as evaluate
import load_annotations            as annotate
import regular_activity            as regular
import specific_arrhythmias        as arrhythmia
import numpy                       as np
import parameters
import os
import csv
import wfdb

data_path = 'sample_data/challenge_training_data/'
ann_path = 'sample_data/challenge_training_multiann/'
ecg_ann_type = 'gqrs'

## Classifying arrhythmia alarms

In [3]:
# Returns true if alarm is classified as a true alarm
def is_classified_true_alarm(data_path, ann_path, sample_name, ecg_ann_type, verbose=False): 
    sig, fields = wfdb.rdsamp(data_path + sample_name)
    alarm_type, is_true_alarm = regular.check_gold_standard_classification(fields)

    is_regular = regular.is_sample_regular(data_path, ann_path, sample_name, ecg_ann_type, alarm_type, should_check_nan=False)    
    if is_regular:
        if verbose: 
            print sample_name + "with regular activity"
        return False
    
    if alarm_type == "Asystole": 
        arrhythmia_test = arrhythmia.test_asystole
    elif alarm_type == "Bradycardia": 
        arrhythmia_test = arrhythmia.test_bradycardia
    elif alarm_type == "Tachycardia": 
        arrhythmia_test = arrhythmia.test_tachycardia
    elif alarm_type == "Ventricular_Tachycardia": 
        arrhythmia_test = arrhythmia.test_ventricular_tachycardia
    elif alarm_type == "Ventricular_Flutter_Fib": 
        arrhythmia_test = arrhythmia.test_ventricular_flutter_fibrillation
    else: 
        raise Exception("Unknown arrhythmia alarm type")
    
    try: 
        classified_true_alarm = arrhythmia_test(data_path, ann_path, sample_name, ecg_ann_type, verbose)
        return classified_true_alarm

    except Exception as e: 
        print "sample_name: ", sample_name, e


def is_true_alarm(data_path, sample_name): 
    sig, fields = wfdb.rdsamp(data_path + sample_name)
    alarm_type, true_alarm = regular.check_gold_standard_classification(fields)
    return true_alarm

In [4]:
# Generate confusion matrix for all samples given sample name/directory
def generate_confusion_matrix_dir(data_path, ann_path, ecg_ann_type): 
    confusion_matrix = {
        "TP": [],
        "FP": [],
        "FN": [],
        "TN": []
    }
    
    for filename in os.listdir(data_path):
        if filename.endswith(parameters.HEADER_EXTENSION):
            sample_name = filename.rstrip(parameters.HEADER_EXTENSION)
            
            true_alarm = is_true_alarm(data_path, sample_name)
            classified_true_alarm = is_classified_true_alarm(data_path, ann_path, sample_name, ecg_ann_type)

            matrix_classification = get_confusion_matrix_classification(true_alarm, classified_true_alarm)
            confusion_matrix[matrix_classification].append(sample_name)
            if matrix_classification == "FN": 
                print "FALSE NEGATIVE: ", filename
                
    return confusion_matrix


def get_confusion_matrix_classification(true_alarm, classified_true_alarm): 
    if true_alarm and classified_true_alarm: 
        matrix_classification = "TP"

    elif true_alarm and not classified_true_alarm: 
        matrix_classification = "FN"

    elif not true_alarm and classified_true_alarm: 
        matrix_classification = "FP"

    else: 
        matrix_classification = "TN"

    return matrix_classification

In [5]:
def print_by_type(false_negatives): 
    counts_by_type = {}
    for false_negative in false_negatives: 
        first = false_negative[0] 
        if first not in counts_by_type.keys(): 
            counts_by_type[first] = 0
        counts_by_type[first] += 1

    print counts_by_type
    
    
def print_by_arrhythmia(confusion_matrix, arrhythmia_prefix): 
    counts_by_arrhythmia = {}
    for classification_type in confusion_matrix.keys(): 
        sample_list = [ sample for sample in confusion_matrix[classification_type] if sample[0] == arrhythmia_prefix]
        counts_by_arrhythmia[classification_type] = (len(sample_list), sample_list)

    print counts_by_arrhythmia
    
def get_counts(confusion_matrix): 
    return { key : len(confusion_matrix[key]) for key in confusion_matrix.keys() }

In [6]:
if __name__ == '__main__': 
    start = datetime.now() 
    confusion_matrix_gqrs = generate_confusion_matrix_dir(data_path, ann_path, 'gqrs')
    counts_gqrs = get_counts(confusion_matrix_gqrs)
    print "total time: ", datetime.now() - start

    evaluate.print_stats(counts_gqrs)
    print_by_type(confusion_matrix_gqrs['FN'])
    print_by_arrhythmia(confusion_matrix_gqrs)    
    
    tp_v = [ sample for sample in confusion_matrix_gqrs['TP'] if sample[0] == 'v' ]
    fn_v = [ sample for sample in confusion_matrix_gqrs['FN'] if sample[0] == 'v' ]
    fp_v = [ sample for sample in confusion_matrix_gqrs['FP'] if sample[0] == 'v' ]
    print "TP: ", tp_v, len(tp_v)
    print "FN: ", fn_v, len(fn_v)
    print "FP: ", fp_v, len(fp_v)

  AT = filebytes[bpi, 1] >> 2  # anntype
  ts = ts + filebytes[bpi, 0] + 256 * (filebytes[bpi, 1] & 3)
  AT = filebytes[bpi, 1] >> 2
  chan[ai] = filebytes[bpi, 0]
  AT = filebytes[bpi, 1] >> 2
  channel_subsig = channel_sig[start:end]
  lf_subsig = lf[start:end]
  sub_subsig = sub[start:end]
  r_delta = get_abp_std_scores(alarm_sig[:,channel_index], std_threshold, window_size, rolling_increment)
  channel_subsig = channel_sig[start:end]
  filebytes[bpi + 2,0] + 256 * filebytes[bpi + 2,1]  # 4 bytes storing dt
  anntype[ai] = filebytes[bpi + 3, 1] >> 2
  subsig = sig[current_start*fs:current_end*fs,:]
  alarm_sig = sig[start*fs:end*fs,:]
  channel_dlfmax = calculate_dlfmax(alarm_sig[:,channel_index])
  dominant_freqs = get_dominant_freq_array(alarm_sig[:,channel_index])
  channel_subsig = channel_sig[start:end]
  return (xf, 2.0/N * np.abs(signal_fft[:N/2]))
  subsig = sig[start:end]
  r_delta = get_abp_std_scores(alarm_sig[:,channel_index], std_threshold, window_size, rolling_incremen

No annotations in specified sample range
No annotations in specified sample range
No annotations in specified sample range
No annotations in specified sample range
No annotations in specified sample range
No annotations in specified sample range
No annotations in specified sample range
No annotations in specified sample range
No annotations in specified sample range
No annotations in specified sample range
No annotations in specified sample range
No annotations in specified sample range
No annotations in specified sample range
No annotations in specified sample range
No annotations in specified sample range
No annotations in specified sample range
No annotations in specified sample range
No annotations in specified sample range
No annotations in specified sample range
No annotations in specified sample range
No annotations in specified sample range
No annotations in specified sample range
No annotations in specified sample range
FALSE NEGATIVE:  v206s.hea
FALSE NEGATIVE:  v523l.hea
No 

TypeError: print_by_arrhythmia() takes exactly 2 arguments (1 given)

## Comparing classification with other algorithms

In [23]:
def generate_others_confusion_matrices(filename, data_path): 
    others_confusion_matrices = {}
    
    with open(filename, "r") as f: 
        reader = csv.DictReader(f)
        authors = reader.fieldnames[1:]
        for author in authors: 
            others_confusion_matrices[author] = { "TP": [], "FP": [], "FN": [], "TN": [] }
            
        for line in reader: 
            sample_name = line['record name']
            true_alarm = is_true_alarm(data_path, sample_name)
            
            for author in authors: 
                classified_true_alarm = line[author] == '1'
                matrix_classification = get_confusion_matrix_classification(true_alarm, classified_true_alarm)
                
                others_confusion_matrices[author][matrix_classification].append(sample_name)
    
    return others_confusion_matrices
                
    
filename = "sample_data/answers.csv"
others_confusion_matrices = generate_others_confusion_matrices(filename, data_path)    

['l.m.eerikainen-209', 'vxk106120-213', 'sibylle.fallet-210', 'hoog.antink-216', 'fplesinger-210', 'bestcly-204', 'bellea-212']


In [24]:
for author in others_confusion_matrices.keys(): 
    other_confusion_matrix = others_confusion_matrices[author]
    print author
    counts = get_counts(other_confusion_matrix)
    evaluate.print_stats(counts)
    print_by_type(other_confusion_matrix['FN'])

hoog.antink-216
counts:  {'FP': 89, 'TN': 367, 'TP': 291, 'FN': 3}
sensitivity:  0.989795918367
specificity:  0.804824561404
ppv:  0.765789473684
f1:  0.86350148368
score:  0.863517060367
{'v': 3}
l.m.eerikainen-209
counts:  {'FP': 65, 'TN': 391, 'TP': 294, 'FN': 0}
sensitivity:  1.0
specificity:  0.857456140351
ppv:  0.818941504178
f1:  0.90045941807
score:  0.913333333333
{}
sibylle.fallet-210
counts:  {'FP': 108, 'TN': 348, 'TP': 276, 'FN': 18}
sensitivity:  0.938775510204
specificity:  0.763157894737
ppv:  0.71875
f1:  0.814159292035
score:  0.759124087591
{'b': 1, 't': 4, 'v': 13}
vxk106120-213
counts:  {'FP': 74, 'TN': 382, 'TP': 280, 'FN': 14}
sensitivity:  0.952380952381
specificity:  0.837719298246
ppv:  0.790960451977
f1:  0.864197530864
score:  0.821339950372
{'v': 14}
fplesinger-210
counts:  {'FP': 64, 'TN': 392, 'TP': 275, 'FN': 19}
sensitivity:  0.93537414966
specificity:  0.859649122807
ppv:  0.811209439528
f1:  0.86887835703
score:  0.807506053269
{'a': 1, 'f': 1, 'b': 