In [1]:
cd ..

/home/dmitriishubin/Desktop/physionet-challenge-2020


In [None]:
#### insert the name of the last run
!tensorboard --logdir=runs/May11_01-21-42_dmitrii

In [2]:
import pandas as pd
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import gc
import ast
import json
import os

from metrics import Metric
from postprocessing import PostProcessing
from utils.KPI_plots import plot_confusion_matrix

In [3]:
def load_waveform(data_path,name):
    
    signal = np.load(data_path+name+'.npy')
        
    return signal

def load_label(data_path,name):
    
    label = json.load(open(data_path+name+'.json'))
        
    return label

In [4]:
%matplotlib qt

# Settings

In [5]:
competition_metric = Metric()
postprocessing = PostProcessing()

# Dataset A

In [28]:
DEBUG_PATH = './data/CV_debug/B/'
DATA_PATH = './data/B/formatted/'

list_records = [i[:-5] for i in os.listdir(DEBUG_PATH) if i.find('.json')!=-1]


In [29]:
# Main processing pipeline
scores_errors = np.array([])
scores_competition = np.array([])
records = np.array([]).astype(np.str)
labels = [] #np.array([])
preds = [] #np.array([])

#load all records
for record in list_records:
    
    label = load_label(DATA_PATH,record)
    records = np.append(records,label['filename'])
    label = label['labels_training_merged']
    
    pred = load_label(DEBUG_PATH,record)
    pred = pred['predicted_label']
    
    #calc score for each record
    label = np.array(label).reshape(1,-1)
    pred = np.array(pred).reshape(1,-1)
    pred = postprocessing.run(pred)
    #label, pred = postprocessing.find_opt_thresold(label, pred)
    
    scores_competition = np.append(scores_competition,competition_metric.compute(label, pred))
    scores_errors = np.append(scores_errors,np.sum(np.abs(label - pred)))
    
    #add predictions and labels for overall KPI estimation
    labels.append(label[0,:])#labels = np.append(labels,label,axis=0)
    preds.append(pred[0,:])#preds = np.append(preds,pred,axis=0)

    
preds = np.array(preds)
labels = np.array(labels)
#plot modified consustion matrix
matrix = competition_metric.compute_modified_confusion_matrix(labels, preds)
label_names = ['IAVB'
'AF',
'AFL',
'Brady',
'CRBBB',
'IRBBB',
'LAnFB',
'LAD',
'LBBB',
'LQRSV',
'NSIVCB',
'PR',
'PAC',
'PVC',
'LPR',
'LQT',
'QAb',
'RAD',
'RBBB',
'SA',
'SB',
'SNR',
'STach',
'SVPB',
'TAb',
'TInv',
'VP'
 ]


plot_confusion_matrix(matrix,label_names)

  cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]


In [30]:
print('Metric for dataset A: ',competition_metric.compute(labels, preds))

Metric for dataset A:  0.48377572309037936


In [31]:
#keeep records with errors only
records = records[np.where(scores_competition < 1)]
scores_competition = scores_competition[np.where(scores_competition <1)]
scores_competition = 1/scores_competition

#TODO: clarify the type of sort
score_order = np.argsort(scores_competition)
records = records[score_order[::1]]

#save a list of files in csv
pd.DataFrame(records).to_csv('./data/dataset_B_list.csv')

  
  This is separate from the ipykernel package so we can avoid doing imports until
  after removing the cwd from sys.path.


In [33]:
def plot_record(record):
    
    ecg = np.load(DATA_PATH+record+'.npy')
    #ecg = preprocessing.run(ecg)
    meta = json.load(open(DATA_PATH+ f'{record}.json'))
    
    pred = load_label(DEBUG_PATH,record)
    pred = pred['predicted_label']
    
    #heatmap = np.array(data[record]['heatmap'],dtype=np.float)
    
    #plot the data
    fig,ax = plt.subplots(figsize=(20,20))
    fig.suptitle(record+', labels: '+str(meta['labels_full']))
    for i in range(12):
        ax.plot(ecg[:,i]+2000*i)
    plt.show()
    
    pred = np.array(pred).reshape(1,-1)
    pred = postprocessing.run(pred)
    
    print('Predictions: ',pred)
    print('Label:       ',meta['labels_training_merged'])
    return np.array(pred)

record = records[0]

pred = plot_record(record=record)


Predictions:  [[0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0.]]
Label:        [0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


# Calculate a competition score across all datasets + joint

In [25]:
DEBUG_PATH = './data/CV_debug/'
DATASETS = ['A','B','D','E']

list_records = []

for dataset in DATASETS:
    list_records += [i[:-5] for i in os.listdir(f'./data/CV_debug/{dataset}/') if i.find('.json')!=-1]


In [26]:
preds = []
labels = []

preds_A = []
labels_A = []

preds_B = []
labels_B = []

preds_D = []
labels_D = []

preds_E = []
labels_E = []

for record in list_records:
    
    if record[0] == 'A':
        dataset = 'A'
        #print('A')

    elif record[0] == 'Q':
        dataset = 'B'
        #print('B')

    elif record[0] == 'I':
        dataset = 'C'
        #print('C')

    elif record[0] == 'S':
        dataset = 'D'
        #print('D')

    elif record[0] == 'H':
        dataset = 'E'
        #print('E')

    elif record[0] == 'E':
        dataset = 'F'
        #print('F')
    
    pred_folder = f'./data/CV_debug/{dataset}/'
    data_folder = f'./data/{dataset}/formatted/'
    
    
    label = load_label(data_folder,record)
    label = label['labels_training_merged']
    label = np.array(label).reshape(1,-1)
    
    pred = load_label(pred_folder,record)
    pred = pred['predicted_label']
    pred = np.array(pred).reshape(1,-1)
#     pred[np.where(pred >= 0.1)] = 1
#     pred[np.where(pred < 0.1)] = 0
    pred = postprocessing.run(pred)
    #label, pred = competition_metric.find_opt_thresold(np.array(label).reshape(1,-1), np.array(pred).reshape(1,-1))
    
    preds.append(pred)
    labels.append(label)
    
    if dataset == 'A':
        preds_A.append(pred)
        labels_A.append(label)
    if dataset == 'B':
        preds_B.append(pred)
        labels_B.append(label)
    if dataset == 'D':
        preds_D.append(pred)
        labels_D.append(label)
    if dataset == 'E':
        preds_E.append(pred)
        labels_E.append(label)
    
preds = np.array(preds).reshape(-1,27)
labels = np.array(labels).reshape(-1,27)

preds_A = np.array(preds_A).reshape(-1,27)
labels_A = np.array(labels_A).reshape(-1,27)

preds_B = np.array(preds_B).reshape(-1,27)
labels_B = np.array(labels_B).reshape(-1,27)

preds_D = np.array(preds_D).reshape(-1,27)
labels_D = np.array(labels_D).reshape(-1,27)

preds_E = np.array(preds_E).reshape(-1,27)
labels_E = np.array(labels_E).reshape(-1,27)

In [27]:
print('Score across all datasets: ',competition_metric.compute(labels, preds))
print('Score, dataset A: ',competition_metric.compute(labels_A, preds_A))
print('Score, dataset B: ',competition_metric.compute(labels_B, preds_B))
print('Score, dataset D: ',competition_metric.compute(labels_D, preds_D))
print('Score, dataset E: ',competition_metric.compute(labels_E, preds_E))

Score across all datasets:  0.5712336672902312
Score, dataset A:  0.756045772108003
Score, dataset B:  0.48377572309037936
Score, dataset D:  nan
Score, dataset E:  0.5216688832100651
