In [1]:
cd ..

/home/dmitriishubin/Desktop/physionet-challenge-2020


In [177]:
import pandas as pd
import numpy as np
import seaborn as sns
import json
import os
import gc
from tqdm import tqdm
import matplotlib.pyplot as plt
from metrics.metrics import Metric

from kardioml.data.p_t_wave_detector import PTWaveDetection

metric = Metric()

In [178]:
%matplotlib qt

In [183]:
#DATASETS = ['A','B','C','D','E','F']
DATASETS = ['A','B','D','E'] #current for training
#DATASETS = ['D']
datalist = []

for dataset in DATASETS:
    files = [i[:-4] for i in os.listdir(f'./data/{dataset}/formatted/') if i.find('.npy')!=-1]
    for file in files:
        datalist.append(f'./data/{dataset}/formatted/'+file)



In [180]:
len(datalist)

43101

# Check the length fits json and numpy

In [None]:
for data in tqdm(datalist):
    signal = np.load(data+'.npy')
    meta = json.load(open(data+'.json'))
    
    if meta['shape'][0] != signal.shape[0]:
        break
    

# Check the length distribution, all datasets

In [None]:
length_list = []
exclusions = []
exclusions_labels = []
exclusions_digits = []

for data in tqdm(datalist):
    meta = json.load(open(data+'.json'))
    if meta['labels_training_merged'] is None:
        continue
    if meta['shape'][0] > 38000:
        file_name = data.split('/')     
        exclusions.append(file_name[-1])
        exclusions_labels.append(meta['labels_full'])
        exclusions_digits.append(meta['labels_training_merged'])
    length_list.append(meta['shape'][0])

    
sns.distplot(length_list)

In [None]:
hist = np.histogram(length_list,2000)
plt.plot(hist[1][:-1][:100],hist[0])

In [None]:
np.percentile(length_list,99)

# Check distribution of classes, calculate weights

In [197]:
labels = []

for data in tqdm(datalist):
    meta = json.load(open(data+'.json'))
    if meta['shape'][0] > 38000:
        continue
    elif meta['labels_training_merged'] is None:
        continue
    else:
        label = meta['labels_training_merged']
        if label[4] > 0 or label[18] >0:
            label[4] = 1
            label[18] = 1
        if label[23] > 0 or label[12] >0:
            label[23] = 1
            label[12] = 1
        if label[26] > 0 or label[13] >0:
            label[26] = 1
            label[13] = 1
        labels.append(label)
    

    
labels = np.array(labels)

100%|██████████| 32683/32683 [02:34<00:00, 211.65it/s]


In [200]:
weights = np.array([1.]*27)
weights[[4,18,23,12,13,26]] = 0.5
weights

array([1. , 1. , 1. , 1. , 0.5, 1. , 1. , 1. , 1. , 1. , 1. , 1. , 0.5,
       0.5, 1. , 1. , 1. , 1. , 0.5, 1. , 1. , 1. , 1. , 0.5, 1. , 1. ,
       0.5])

In [198]:
weights = np.sum(labels,axis=0)
weights = weights/labels.shape[0]
# weights[np.where(weights == 0.)] = 1
# weights = 1/weights
# weights = weights.tolist()
weights

array([0.05770264, 0.10168702, 0.00436978, 0.00916938, 0.08814786,
       0.04290985, 0.05823991, 0.18431892, 0.02876178, 0.00651886,
       0.0284036 , 0.01070955, 0.04359039, 0.00633977, 0.01217809,
       0.00436978, 0.01966403, 0.01232136, 0.08814786, 0.0280096 ,
       0.02432036, 0.68050432, 0.0400086 , 0.04359039, 0.08478097,
       0.01070955, 0.00633977])

# Checking PT detector perfomance

In [None]:
data_range = np.arange(len(datalist))[0:11].tolist()

datalist_sub = [i for index,i in enumerate(datalist) if index in data_range]


for data in tqdm(datalist_sub):
    meta = json.load(open(data+'.json'))
    if meta['shape'][0] > 38000:
        continue
    elif meta['labels_training_merged'] is None:
        continue
    else:
        signal = np.load(data+'.npy')
        
        
        fig = plt.figure(figsize=(20,10))
        for i in range(12):
            channel = signal[:,i]
            plt.plot(channel+i*500)
            plt.plot(meta['p_waves'][i],channel[meta['p_waves'][i]]+i*500,'*')
            plt.plot(meta['t_waves'][i],channel[meta['t_waves'][i]]+i*500,'*')
        plt.show()
        plt.title(str(meta['labels_full'])+' | '+data)
        print('===================')
        print(data)
        print(meta['labels_full'])
        print('===================')
        if data == '../data/A/formatted/A2091':
            break

# Custom metric

In [174]:
#pred = np.array([1,1,0.2,0.3,0]).reshape(1,-1)
pred =  np.array([1,1,1,1,1]).reshape(1,-1)
label = np.array([[0,0,0,1,1]]).reshape(1,-1)

# pred1 =  np.array([0,0,1,0,1]).reshape(1,-1)
# label1 = np.array([0,0,0,0,1]).reshape(1,-1)

# label = np.append(label,label1,axis=0)
# pred = np.append(pred,pred1,axis=0)

In [175]:
cm = metric.compute_modified_confusion_matrix(label,label)

cm#*metric.weights

2.0


array([[0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0.5, 0.5],
       [0. , 0. , 0. , 0.5, 0.5]])

In [165]:
np.sum(cm)

2.0

In [170]:
pred1 =  pred
label1 = label#/np.sum(label,axis=0)

pred11 = label1-pred1
pred11 = np.clip(pred11,0,100)

pred12 = pred1-label1
pred12 = np.clip(pred12,0,100)

(np.matmul(label1.T,pred11)+np.matmul(label1.T,pred1)) * np.matmul(label1.T,pred12)

array([[0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.81, 0.81, 1.  , 0.  , 0.  ],
       [0.81, 0.81, 1.  , 0.  , 0.  ]])

In [171]:
np.matmul(label1.T,pred12)

array([[0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. ],
       [0.9, 0.9, 1. , 0. , 0. ],
       [0.9, 0.9, 1. , 0. , 0. ]])

In [172]:
np.matmul(label1.T,pred1)

array([[0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. ],
       [0.9, 0.9, 1. , 1. , 1. ],
       [0.9, 0.9, 1. , 1. , 1. ]])

In [138]:
pred11

array([[1, 1, 0, 0, 0]])

In [125]:
df = pd.DataFrame([pred1.shape,label1])

ValueError: Must pass 2-d input. shape=(2, 1, 5)

In [74]:
np.sum(label,axis=0)

array([0, 0, 2, 1, 2])

In [77]:
label

array([[0, 0, 1, 1, 1],
       [0, 0, 1, 0, 1]])

In [85]:
pred

array([[0, 0, 1, 1, 1],
       [0, 0, 1, 0, 1]])