# Library import

In [None]:
""" os """
import os

""" torch """
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader, random_split, SubsetRandomSampler, WeightedRandomSampler
from torch.utils.data.sampler import SubsetRandomSampler


"""tensor board"""
import torchvision
#from torch.utils.tensorboard import SummaryWriter


"""glob"""
from glob import glob

""" tqdm """
import time
from tqdm import tqdm

"""Pandas"""
import pandas as pd

""" numpy """
import numpy as np
from numpy import argmax
from PIL import Image

"""JSON"""
import json

"""sklearn"""
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics import auc
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import mean_absolute_error, r2_score

"""seaborn"""
import seaborn as sns

"""scipy"""
from scipy import io
from scipy import signal
from scipy.fft import fft, ifft,fftfreq
from scipy import stats

"""SUMMARY"""
from torchsummary import summary

"""time"""
import time


"""pingouin"""
import pingouin as pg

import re
import shutil
import random
import matplotlib.pyplot as plt
import scipy

# Path init

In [None]:
os.listdir()

In [None]:
cur_path = "path"
os.chdir(cur_path)
os.listdir()

In [None]:
experiment_num = 
fig_save_path = 'path'

# Hyper parameters

In [None]:
seed = 1

#validation ratio
validation_ratio = 0.1

#learning rate
lr = 0.001
 
momentum = 0.5


batch_size = 512    
test_batch_size = 512


epochs = 50
no_cuda = False

log_interval = 5

# Set the seed and GPU

In [None]:


torch.manual_seed(seed)


use_cuda = not no_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

kwargs = {'num_workers':0,'pin_memory':True} if use_cuda else {}

# Model

In [None]:
class ECG_CNN(nn.Module):
    def __init__(self, num_classes=2):
        super(ECG_CNN, self).__init__()

        """Convolution"""
        self.conv1 = nn.Conv1d(12, 48, kernel_size=5, stride=1 )
        self.conv2 = nn.Conv1d(48, 96, kernel_size=5, stride=1 )
        self.conv3 = nn.Conv1d(96, 192, kernel_size=5, stride=1 )
        
        """BatchNormalize"""
        self.bn1 = nn.BatchNorm1d(48)
        self.bn2 = nn.BatchNorm1d(96)
        self.bn3 = nn.BatchNorm1d(192)
        
        """ReLU"""
        self.relu = nn.ReLU(inplace=True)
        
        """Sigmoid"""
        self.sigmoid = nn.Sigmoid()
        
        """ MaxPooling"""     
        self.maxpool1 = nn.MaxPool1d(kernel_size=2, stride=2)
        
        """ Global Max, Average Pooling"""
        #self.globalmaxpool1 = nn.MaxPool1d(246, stride=1)
        self.globalavrpool1 = nn.AvgPool1d(kernel_size = 621, stride=1)
        
        """Drop out"""
        self.dp = nn.Dropout(p=0.5)
        
        
        """Fully Connected """ 
        self.fc1 = nn.Linear(1*192, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, num_classes)
        
    def forward(self, x):
        
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool1(x)
        
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.maxpool1(x)
        
        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu(x)
        x = self.maxpool1(x)
        
        #x = self.globalmaxpool1(x)
        x = self.globalavrpool1(x)
        
        
        """Fully Connected"""
        x = x.view(-1, 1*192*1) 
        
   
        
        # Dense
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        
        
        return x

# Data Path(12-Lead)

In [None]:
with open('path', 'r') as f:
    label_data = json.load(f)

In [None]:
data_paths_A    = []
data_paths_B    = []
data_paths_O    = []
data_paths_AB   = []
for i in range(0,len(label_data),1):
    if label_data[i]['ABO'] == 'A':
        data_paths_A.append(i)
    elif label_data[i]['ABO'] == 'B':
        data_paths_B.append(i)
    elif label_data[i]['ABO'] == 'O':
        data_paths_O.append(i)
    elif label_data[i]['ABO'] == 'AB':
        data_paths_AB.append(i)

In [None]:
len(data_paths_A), len(data_paths_B), len(data_paths_O), len(data_paths_AB)

# train, validatin, test set

In [None]:
def Make_train_validation_test_paths(data_paths, train_ratio, valid_ratio):
    
    """count number of data paths"""
    num_datapaths = len(data_paths)
    indices = list(range(num_datapaths))
    
    train_split = int(np.floor(train_ratio * num_datapaths))
    valid_split = int(np.floor((train_ratio+valid_ratio) * num_datapaths))

    """set the seed and shuffle"""
    np.random.seed(seed)
    np.random.shuffle(indices)
    
    """spllit data paths"""
    train_data_paths      = []
    validation_data_paths = []
    test_data_pahts       = []
    
    
    train_idx, valid_idx, test_idx = indices[:train_split], indices[train_split:valid_split], indices[valid_split:]
    

    for i in range(0,len(train_idx),1):
        train_data_paths.append( data_paths[train_idx[i]])
        
    for i in range(0,len(valid_idx),1):
        validation_data_paths.append( data_paths[valid_idx[i]])
        
    for i in range(0,len(test_idx),1):
        test_data_pahts.append( data_paths[test_idx[i]])
        
    
    
    return train_data_paths, validation_data_paths, test_data_pahts

In [None]:
train_paths_A, validation_paths_A, test_paths_A = Make_train_validation_test_paths(data_paths_A,0.4,0.3)
train_paths_B, validation_paths_B, test_paths_B = Make_train_validation_test_paths(data_paths_B,0.4,0.3)
train_paths_O, validation_paths_O, test_paths_O = Make_train_validation_test_paths(data_paths_O,0.4,0.3)
train_paths_AB, validation_paths_AB, test_paths_AB = Make_train_validation_test_paths(data_paths_AB,0.4,0.3)

In [None]:
len(train_paths_A), len(validation_paths_A), len(test_paths_A)

In [None]:
train_paths      = train_paths_A + train_paths_B + train_paths_O + train_paths_AB
validation_paths = validation_paths_A + validation_paths_B + validation_paths_O + validation_paths_AB
test_paths       = test_paths_A + test_paths_B + test_paths_O + test_paths_AB
len(train_paths), len(validation_paths), len(test_paths)

# Custom Dataset

In [None]:
Class2Idx = {'A':np.int8(0), 'B':np.int8(0), 'O':np.int8(0), 'AB':np.int8(1)}

In [None]:
class CustomDataset(Dataset):

    def __init__(self, data_paths, transform=None):
        self.data_paths = data_paths
        self.transform = transform

    def __getitem__(self, idx):
          
        """get data"""
        tempt = self.data_paths[idx] 
        path  = label_data[tempt]['data_path']
        data = np.load(file = path)
        
        
        """get label"""
        label = Class2Idx[ label_data[tempt]['ABO'] ]

        return data,label
    
    
    def __len__(self):
        return len(self.data_paths)

In [None]:
train_dataset      = CustomDataset(train_paths,transforms.Compose([transforms.ToTensor()]))
validation_dataset = CustomDataset(validation_paths,transforms.Compose([transforms.ToTensor()]))
test_dataset       = CustomDataset(test_paths,transforms.Compose([transforms.ToTensor()]))

print(len(train_dataset),len(validation_dataset), len(test_dataset))

# Data Loader

In [None]:
"""Train"""
train_loader = torch.utils.data.DataLoader(
    dataset=train_dataset,
    batch_size = batch_size,
    #sampler = train_sampler,
    shuffle = True, 
    **kwargs
)

"""Validation"""
validation_loader = torch.utils.data.DataLoader(
    dataset=validation_dataset,
    batch_size = batch_size,
    #sampler = valid_sampler,
    shuffle = True,
    **kwargs
)

"""Test"""
test_loader = torch.utils.data.DataLoader(
    dataset=test_dataset,
    batch_size = test_batch_size,
    shuffle = True,
    **kwargs
)


# Optimizer

In [None]:
model = ECG_CNN().to(device)

optimizer = optim.Adam(model.parameters(), lr=lr)

summary(model, (12, 5000))

# Train

## init

In [None]:
"""Train"""
train_losses        = []
avg_train_losses    = []
Train_baths_ACC     = [] 
Train_ACC           = [] 


"""Validaion"""
valid_losses        = []
avg_valid_losses    = []
Validation_ACC      = []
Valid_ACC_per_Class = []

In [None]:
"""save best model"""
best_acc = 0
best_model_save_path = 'path'
best_model_save_path = best_model_save_path + str(experiment_num)

In [None]:
num_classes = 2
criterion = nn.CrossEntropyLoss()

In [None]:
for epoch in range(1, epochs + 1):
    
    """Train"""
    model.train()
    
    train_loss = 0
    
    for batch_idx, (data, target) in enumerate(train_loader):
        
        
        data, target = data.to(device, dtype=torch.float), target.to(device, dtype=torch.long)
        optimizer.zero_grad()
        
        output = model(data)
        
        #print(output)

        """pred(Cross Entropy)"""

        pred = F.softmax(output,dim =1).argmax(dim=1, keepdim=True) 
        
        """loss"""
        loss = criterion(output, target)
        train_loss += loss.item()

        loss.backward() 
        optimizer.step()
        
        
        correct = 0
        total = target.size(0)
        correct += pred.eq(target.view_as(pred)).sum().item()
        accuracy = 100. * correct / total
        Train_baths_ACC.append(accuracy)
        
        
        if batch_idx % log_interval == 0:
            #1.
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

            #2.
            print('Train set:  batch loss: {:.4f}, Accuracy: {:.0f}% '.format(
                loss.item() ,accuracy))

   
    
    
    """Validation"""
    model.eval()
    
    valid_loss = 0
    correct = 0
    total = len(validation_loader.dataset)
    
    valid_confusion_matrix = torch.zeros(num_classes, num_classes)

    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(validation_loader):
            data, target = data.to(device, dtype=torch.float), target.to(device, dtype=torch.long)
            
            output = model(data)


            """pred(Cross Entropy)"""
            #pred = output.argmax(dim=1, keepdim=True) # cross entropy
            pred = F.softmax(output,dim =1).argmax(dim=1, keepdim=True) 
            
            """loss"""
            loss = criterion(output, target)
            valid_loss += loss.item()
        
            correct += pred.eq(target.view_as(pred)).sum().item()
            
            for t, p in zip(target.view(-1), pred.view(-1)):
                valid_confusion_matrix[t.int(), p.int()] += 1

                
    """Loss and ACC """
    

    train_loss /= len(train_loader)
    valid_loss /= len(validation_loader)
    avg_train_losses.append(train_loss)
    avg_valid_losses.append(valid_loss)

    Train_ACC.append(sum(Train_baths_ACC)/len(Train_baths_ACC))
    Train_baths_ACC = []
    
    valid_accuracy = 100. * correct / total
    Validation_ACC.append(valid_accuracy)
    

    Valid_ACC_per_Class.append((valid_confusion_matrix.diag()/valid_confusion_matrix.sum(1))*100)
    
    print('------------------------------------------')
    print('Valid set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        valid_loss, correct, total, valid_accuracy))
    print('-------------------------------------------')
    
    
    """Save best model"""
    
    if valid_accuracy > best_acc:
        torch.save(model, best_model_save_path)
        print("model saved.")
        print('-------------------------------------------')
        best_acc = valid_accuracy
    
    

# Model save and load

In [None]:
os.listdir()

In [None]:
save_path = 'path'

In [None]:
save_path = save_path + str(experiment_num)

In [None]:
torch.save(model, save_path)

# Load best model

In [None]:
model = torch.load(best_model_save_path)
model.to(device)

optimizer = optim.Adam(model.parameters(), lr=lr)

In [None]:
model.eval()

# Youden index's cut off

In [None]:
true_labels  = np.array([]) 
pred_labels  = np.array([]) 
target_score = np.array([]) 

"""Validation"""
model.eval()
    
valid_loss = 0
correct = 0
total = len(validation_loader.dataset)
    

with torch.no_grad():
    for batch_idx, (data, target) in enumerate(validation_loader):
        data, target = data.to(device, dtype=torch.float), target.to(device, dtype=torch.long)
        output = model(data)
        
        """pred(Cross Entropy)"""     
        output = F.softmax(output,dim=1)
        pred   = output.argmax(dim=1, keepdim=True)
        
        correct += pred.eq(target.view_as(pred)).sum().item()
        
        true_labels = np.append(true_labels,np.array(target.cpu())) 
        pred_labels = np.append(pred_labels,np.array(pred.cpu()))  
        target_score = np.append(target_score,np.array(output[:,1].cpu())) 
        
        print('validation: [{}/{}] '.format(batch_idx,len(validation_loader)-1))

In [None]:
"""Youden’s J statistic. / J = Sensitivity + Specificity – 1"""

# calculate roc curves
FPR, TPR, thresholds = roc_curve(true_labels, target_score)

# get the best threshold
J = TPR - FPR
idx = argmax(J)
best_thresh = thresholds[idx]

print('Best Threshold=%f, sensitivity = %.3f, specificity = %.3f, J=%.3f' % (best_thresh, TPR[idx], 1-FPR[idx], J[idx]))

# Test

In [None]:
true_labels  = np.array([]) 
pred_labels  = np.array([]) 
target_score = np.array([]) 

"""test"""
model.eval()
    
test_loss = 0
correct = 0
total = len(test_loader.dataset)



with torch.no_grad():
    for batch_idx, (data, target) in enumerate(test_loader):
        
        
        data, target = data.to(device, dtype=torch.float), target.to(device, dtype=torch.long)
        output = model(data)
        
        """pred"""
        output = F.softmax(output,dim=1)
        
        #pred   = output.argmax(dim=1, keepdim=True)
        pred = (output[:,1] > best_thresh).int() 
        
        
        """loss"""
        loss = criterion(output, target)
        test_loss += loss.item()
        correct += pred.eq(target.view_as(pred)).sum().item()
        
        
        
        """결과값 누적."""
        true_labels = np.append(true_labels,np.array(target.cpu())) 
        pred_labels = np.append(pred_labels,np.array(pred.cpu()))  
        target_score = np.append(target_score,np.array(output[:,1].cpu())) 
        
        print('test: [{}/{}] '.format(batch_idx,len(test_loader)-1))
        
        
test_loss /= len(test_loader)
test_accuracy = 100. * correct / total


print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
    test_loss, correct, total, test_accuracy))


In [None]:
test_accuracy