# Library import

In [None]:
""" os """
import os

""" torch """
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader, random_split, SubsetRandomSampler, WeightedRandomSampler
from torch.utils.data.sampler import SubsetRandomSampler


"""tensor board"""
import torchvision
#from torch.utils.tensorboard import SummaryWriter


"""glob"""
from glob import glob

""" tqdm """
import time
from tqdm import tqdm

"""Pandas"""
import pandas as pd

""" numpy """
import numpy as np
from numpy import argmax
from PIL import Image

"""JSON"""
import json

"""sklearn"""
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics import auc
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import mean_absolute_error, r2_score

"""seaborn"""
import seaborn as sns

"""scipy"""
from scipy import io
from scipy import signal
from scipy.fft import fft, ifft,fftfreq
from scipy import stats

"""SUMMARY"""
from torchsummary import summary

"""time"""
import time


"""pingouin"""
import pingouin as pg

import re
import shutil
import random
import matplotlib.pyplot as plt
import matplotlib.transforms 
import scipy

# Path init

In [None]:
os.listdir()

In [None]:
cur_path = "path"
os.chdir(cur_path)
os.listdir()

In [None]:
experiment_num = 
fig_save_path = 'path'

In [None]:
folder_path = fig_save_path + str(experiment_num)

try:
    if not(os.path.isdir(folder_path)):
        os.makedirs(os.path.join(folder_path))
except OSError as e:
    if e.errno != errno.EEXIST:
        print("Failed to create directory!!!!!")
        raise

# Hyper parameters

In [None]:
seed = 1

#validation ratio
validation_ratio = 0.1

#learning rate
lr = 0.001
 

momentum = 0.5


batch_size = 512    
test_batch_size = 512


epochs = 50
no_cuda = False

log_interval = 5

# Set the seed and GPU

In [None]:

torch.manual_seed(seed)

use_cuda = not no_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

kwargs = {'num_workers':0,'pin_memory':True} if use_cuda else {}

# Model

In [None]:
class ECG_CNN(nn.Module):
    def __init__(self, num_classes=1):
        super(ECG_CNN, self).__init__()

        """Convolution"""
        self.conv1 = nn.Conv1d(12, 48, kernel_size=5, stride=1 )
        self.conv2 = nn.Conv1d(48, 96, kernel_size=5, stride=1 )
        self.conv3 = nn.Conv1d(96, 192, kernel_size=5, stride=1 )
        
        """BatchNormalize"""
        self.bn1 = nn.BatchNorm1d(48)
        self.bn2 = nn.BatchNorm1d(96)
        self.bn3 = nn.BatchNorm1d(192)
        
        """ReLU"""
        self.relu = nn.ReLU(inplace=True)
        
        """Sigmoid"""
        self.sigmoid = nn.Sigmoid()
        
        """ MaxPooling"""     
        self.maxpool1 = nn.MaxPool1d(kernel_size=2, stride=2)
        
        """ Global Max, Average Pooling"""
        self.globalavrpool1 = nn.AvgPool1d(kernel_size = 621, stride=1)
        
        """Drop out"""
        self.dp = nn.Dropout(p=0.5)
        
        
        """Fully Connected """ 
        self.fc1 = nn.Linear(1*192, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, num_classes) 
        
    def forward(self, x):
        
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool1(x)
        
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.maxpool1(x)
        
        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu(x)
        x = self.maxpool1(x)
        
        #x = self.globalmaxpool1(x)
        x = self.globalavrpool1(x)
        
        
        """Fully Connected"""
        x = x.view(-1, 1*192*1) 
        
        
        # Dense
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        
        
        return x

# Data Path(12-Lead)

In [None]:
with open('path', 'r') as f:
    label_data = json.load(f)

In [None]:
data_paths_uw,data_paths_normal,data_paths_ow,data_paths_obese = [],[],[],[]

count_nan = 0

for i in range(0,len(label_data),1):
    
    """get label"""
    tempt = label_data[i]['bmi']
    
    if np.isnan(tempt):
        count_nan += 1
        continue
    elif tempt < 18.5:
        data_paths_uw.append(i)
    elif 18.5 <= tempt and tempt <= 22.9 :
        data_paths_normal.append(i)
    elif 23.0 <= tempt and tempt <= 24.9:
        data_paths_ow.append(i)
    else:
        data_paths_obese.append(i)

In [None]:
len(data_paths_uw), len(data_paths_normal), len(data_paths_ow), len(data_paths_obese)

In [None]:
count_nan

# train, validatin, test set

In [None]:
def Make_train_validation_test_paths(data_paths, train_ratio, valid_ratio):
    
    """count number of data paths"""
    num_datapaths = len(data_paths)
    indices = list(range(num_datapaths))
    
    train_split = int(np.floor(train_ratio * num_datapaths))
    valid_split = int(np.floor((train_ratio+valid_ratio) * num_datapaths))

    """set the seed and shuffle"""
    np.random.seed(seed)
    np.random.shuffle(indices)
    
    """spllit data paths"""
    train_data_paths      = []
    validation_data_paths = []
    test_data_pahts       = []
    
    
    train_idx, valid_idx, test_idx = indices[:train_split], indices[train_split:valid_split], indices[valid_split:]
    

    for i in range(0,len(train_idx),1):
        train_data_paths.append( data_paths[train_idx[i]])
        
    for i in range(0,len(valid_idx),1):
        validation_data_paths.append( data_paths[valid_idx[i]])
        
    for i in range(0,len(test_idx),1):
        test_data_pahts.append( data_paths[test_idx[i]])
        
    
    
    return train_data_paths, validation_data_paths, test_data_pahts

In [None]:
train_paths_uw, validation_paths_uw, test_paths_uw = Make_train_validation_test_paths(data_paths_uw,0.4,0.3)
train_paths_normal, validation_paths_normal, test_paths_normal = Make_train_validation_test_paths(data_paths_normal,0.4,0.3)
train_paths_ow, validation_paths_ow, test_paths_ow = Make_train_validation_test_paths(data_paths_ow,0.4,0.3)
train_paths_obese, validation_paths_obese, test_paths_obese = Make_train_validation_test_paths(data_paths_obese,0.4,0.3)

len(train_paths_uw), len(validation_paths_uw), len(test_paths_uw)

In [None]:
train_paths      = train_paths_uw + train_paths_normal + train_paths_ow + train_paths_obese
validation_paths = validation_paths_uw + validation_paths_normal + validation_paths_ow + validation_paths_obese
test_paths       = test_paths_uw + test_paths_normal + test_paths_ow + test_paths_obese
len(train_paths), len(validation_paths), len(test_paths)

# Remove outlier in test set

In [None]:
len(test_paths)

In [None]:
out_idx_list = []

for i in range(0,len(test_paths),1):
    
    test_idx = test_paths[i]
    tempt    = label_data[test_idx]
    
    if tempt['bmi']< 12:
        out_idx_list.append(test_idx)
    elif tempt['bmi']> 40:
        out_idx_list.append(test_idx)

        

test_paths =  [x for x in test_paths if x not in out_idx_list]


# Custom Dataset

In [None]:
class CustomDataset(Dataset):

    def __init__(self, data_paths, transform=None):
        self.data_paths = data_paths
        self.transform = transform

    def __getitem__(self, idx):
          
        """get data"""
        tempt = self.data_paths[idx] 
        path  = label_data[tempt]['data_path']
        
        data = np.load(file = path)

        
        """get label"""
        label = float(label_data[tempt]['bmi'])
        
        return data,label
    
    
    def __len__(self):
        return len(self.data_paths)

In [None]:
train_dataset      = CustomDataset(train_paths,transforms.Compose([transforms.ToTensor()]))
validation_dataset = CustomDataset(validation_paths,transforms.Compose([transforms.ToTensor()]))
test_dataset       = CustomDataset(test_paths,transforms.Compose([transforms.ToTensor()]))

print(len(train_dataset),len(validation_dataset), len(test_dataset))

# Data Loader

In [None]:
"""Train"""
train_loader = torch.utils.data.DataLoader(
    dataset=train_dataset,
    batch_size = batch_size,
    #sampler = train_sampler,
    shuffle = True,  
    **kwargs
)

"""Validation"""
validation_loader = torch.utils.data.DataLoader(
    dataset=validation_dataset,
    batch_size = batch_size,
    #sampler = valid_sampler,
    shuffle = True,
    **kwargs
)

"""Test"""
test_loader = torch.utils.data.DataLoader(
    dataset=test_dataset,
    batch_size = test_batch_size,
    shuffle = True,
    **kwargs
)

In [None]:
print("Length of the train_loader:", len(train_loader))
print("Length of the val_loader:", len(validation_loader))
print("Length of the test_loader:", len(test_loader))

# Optimizer

In [None]:
model = ECG_CNN().to(device)

optimizer = optim.Adam(model.parameters(), lr=lr)

# Train

## init

In [None]:
"""Train"""
train_losses        = []
avg_train_losses    = []
Train_baths_ACC     = [] 
Train_ACC           = [] 


"""Validaion"""
valid_losses        = []
avg_valid_losses    = []
Validation_ACC      = []
Valid_ACC_per_Class = []


In [None]:
"""save best model"""
best_loss = 100
best_model_save_path = 'path'
best_model_save_path = best_model_save_path + str(experiment_num)

In [None]:
num_classes = 1
criterion = nn.L1Loss() #MAE

In [None]:
for epoch in range(1, epochs + 1):
    
    """Train"""
    model.train()
    
    train_loss = 0
    
    for batch_idx, (data, target) in enumerate(train_loader):
        
        
        data, target = data.to(device, dtype=torch.float), target.to(device, dtype=torch.float)
        optimizer.zero_grad()
        
        output = model(data)
        
        """pred(Cross Entropy)"""
        pred = output
        
        """loss"""
        loss = criterion(output, target.view(-1,1))
        train_loss += loss.item()

        loss.backward() 
        optimizer.step()
        
        
        if batch_idx % log_interval == 0:
            #1.
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

   
    
    """Validation"""
    model.eval()
    
    valid_loss = 0
    total = len(validation_loader.dataset)

    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(validation_loader):
            data, target = data.to(device, dtype=torch.float), target.to(device, dtype=torch.float)
            
            output = model(data)


            """pred(Cross Entropy)"""
            pred = output
            
            """loss"""
            loss = criterion(output, target.view(-1,1))
            valid_loss += loss.item()


                
    """Loss and ACC """
    
   
    train_loss /= len(train_loader)
    valid_loss /= len(validation_loader)
    avg_train_losses.append(train_loss)
    avg_valid_losses.append(valid_loss)

    
    
    """
    print('Valid set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
        valid_loss, correct, total, valid_accuracy))
    """
    print('------------------------------------------')
    print('Valid set: Average loss: {:.4f}'.format(valid_loss))
    print('-------------------------------------------')
    
    
    
    """Save best model"""
    
    if valid_loss < best_loss:
        torch.save(model, best_model_save_path)
        print("model saved.")
        print('-------------------------------------------')
        best_loss = valid_loss
    

# Model save and load

In [None]:
os.listdir()

In [None]:
save_path = 'path'

In [None]:
save_path = save_path + str(experiment_num)

In [None]:
torch.save(model, save_path)

# Load best model

In [None]:
model = torch.load(best_model_save_path)
model.to(device)

optimizer = optim.Adam(model.parameters(), lr=lr)
model.eval()

# Test

In [None]:
true_labels  = np.array([]) 
pred_labels  = np.array([]) 

"""Test"""
model.eval()
    
test_loss = 0
total = len(test_loader.dataset)
    

with torch.no_grad():
    for batch_idx, (data, target) in enumerate(test_loader):
        data, target = data.to(device, dtype=torch.float), target.to(device, dtype=torch.float)
        output = model(data)
        
        pred   = output
        
        """MAE loss"""
        loss = criterion(output, target.view(-1,1))
        test_loss += loss.item()
        
        true_labels = np.append(true_labels,np.array(target.cpu())) 
        pred_labels = np.append(pred_labels,np.array(pred.cpu()))  

        
        print('test: [{}/{}] '.format(batch_idx,len(validation_loader)-1))
        
test_loss /= len(test_loader)