# Setup

In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

In [2]:
from IPython.display import clear_output
import os, sys, shutil
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from tqdm import tqdm, trange
from glob import glob
import random

import cv2 as cv

In [3]:
print(os.getcwd())

D:\GitCloneProject\HeartResearch\Experiment\Approach\EfficientB0


In [4]:
for i in range(3):
    os.chdir("..")

print(os.getcwd())

D:\GitCloneProject\HeartResearch


In [5]:
# D:\GitCloneProject\HeartResearch\Data set\v4_data\med_scaleogram_h256_w512_seglen1600_scl500
main_data_dir = os.getcwd() + "\\Data set"

label_csv_path = main_data_dir + "\\Label.csv"

img_data_dir = main_data_dir + "\\v4_data\\med_scaleogram_h256_w512_seglen1600_scl500"
print(len(os.listdir(img_data_dir)))

425820


In [6]:
label_df = pd.read_csv(label_csv_path)
label_df.head()

Unnamed: 0,Recording,First_label,Second_label,Third_label
0,A0001,5,,
1,A0002,1,,
2,A0003,2,,
3,A0004,2,,
4,A0005,7,,


In [7]:
label_df["First_label"].value_counts()

5    1695
2    1098
1     918
8     826
3     704
7     653
6     574
4     207
9     202
Name: First_label, dtype: int64

 # Data Loader

In [8]:
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from torch.nn.functional import one_hot

In [9]:
class HeartDisease(Dataset):
    def __init__(self, label_df: pd.DataFrame = label_df, 
                 root_dir: str = img_data_dir, 
                 ratio: tuple = (0.8, 0.1, 0.1),
                 subset = "training", # validating, testing
                 resize: tuple = None, #  (256, 512)
                 seed: int = 777
                ): 
        
        self.label_df = label_df
        self.label_col = torch.tensor(self.label_df["First_label"].values - np.ones_like(self.label_df["First_label"].values.shape))
        self.onehot_label = one_hot(self.label_col)
        
        self.label_dict = {
            name : vector for name, vector in zip(self.label_df["Recording"], self.onehot_label)
        }
        
        self.root_dir = root_dir
        self.ratio = ratio
        self.subset = subset
        self.resize = resize
        self.fullpaths = glob(self.root_dir + "\\*")    
        random.Random(seed).shuffle(self.fullpaths)
        self.start = int(len(self.fullpaths) * self.ratio[0])
        self.mid = int(len(self.fullpaths) * self.ratio[1])
        self.stop = int(len(self.fullpaths) * self.ratio[2])
        if self.subset == "training":
            self.imgpaths = self.fullpaths[ : self.start]
        elif self.subset == "validating":
            self.imgpaths = self.fullpaths[self.start : self.start + self.mid]
        elif self.subset == "testing":
            self.imgpaths = self.fullpaths[self.start + self.mid : self.start + self.mid + self.stop]
        else:
            raise Exception(f'Invalid subset. Subset Arg should be training, validating, and testing but "{self.subset}" found')

    def __len__(self):
        return len(self.imgpaths)

    def __getitem__(self, idx:list = None):
        if idx == None:
            raise Exception('idx arg cannot be None')
            
        imgs = []
        labels= []
        
        for i in idx:
            temp_path = self.imgpaths[i]
            
            filename = temp_path.split("\\")[-1].split("_")[0]
            labels.append(self.label_dict[filename])
            
            
            temp_img = cv.imread(temp_path)
            temp_img = torch.tensor(temp_img).permute(-1, 0, 1)
            imgs.append(temp_img)
        
        batch_imgs = torch.stack(imgs)
        batch_labels = torch.stack(labels)

        return batch_imgs, batch_labels

# Model 

In [10]:
from torchvision.models import efficientnet_b0, EfficientNet_B0_Weights
from torch import nn

class HeartModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.ori_model = efficientnet_b0(weights = EfficientNet_B0_Weights.IMAGENET1K_V1)
        self.ori_model.classifier = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(1280, 9),
            nn.Softmax(dim = 1)
        )
        
    def forward(self, x):        
        x = self.ori_model(x)
        return x

model = HeartModel().to("cuda")
x = torch.randn((1, 3, 256, 512)).to("cuda")
out = model(x)
print(out.shape)

torch.Size([1, 9])


In [11]:
model

HeartModel(
  (ori_model): EfficientNet(
    (features): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): SiLU(inplace=True)
      )
      (1): Sequential(
        (0): MBConv(
          (block): Sequential(
            (0): Conv2dNormActivation(
              (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
              (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): SiLU(inplace=True)
            )
            (1): SqueezeExcitation(
              (avgpool): AdaptiveAvgPool2d(output_size=1)
              (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
              (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
              (activation): SiLU(inplace=True)
              (scale_

# Training

In [26]:
from datetime import datetime

opt_mapping = {
    "Adam" : torch.optim.Adam
}

loss_mapping = {
    "CCE" : nn.CrossEntropyLoss()
}

class Training:
    def __init__(self, 
                 device: str = "cuda",
                 learning_rate = 0.0001,
                 optimizer = "Adam",
                 loss = "CCE",
                 model = model,
                 batchsize = 32,
                 epochs = 1,
                 label_df: pd.DataFrame = label_df, 
                 root_dir: str = img_data_dir, 
                 ratio: tuple = (0.8, 0.1, 0.1),
                 mode = "development",
                 resize: tuple = None, #  (256, 512)
                 seed: int = 777,
                ):
        
        # Setup
        self.current_time = datetime.now().strftime("%m-%d-%Y - %H-%M-%S")
        self.model = model
        self.lr = learning_rate
        self.optimizer = opt_mapping[optimizer](self.model.parameters(), lr=self.lr)
        self.loss_fn = loss_mapping[loss]
        self.bs = batchsize
        self.ep = epochs
        self.default_ratio = (0.001, 0.001, 0.001)
        
        # Data
        self.label_df = label_df
        self.root_dir = root_dir
        if mode == "experiment":
            self.ratio = ratio
        elif mode == "development":
            self.ratio = self.default_ratio
        self.resize = resize
        self.seed = seed
        
        self.train_data = HeartDisease(label_df = self.label_df, 
                 root_dir = self.root_dir, 
                 ratio = self.ratio,
                 subset = "training",
                 resize = self.resize,
                 seed = self.seed)
        
        self.valid_data = HeartDisease(label_df = self.label_df, 
                 root_dir = self.root_dir, 
                 ratio = self.ratio,
                 subset = "validating",
                 resize = self.resize,
                 seed = self.seed)
        
        self.test_data = HeartDisease(label_df = self.label_df, 
                 root_dir = self.root_dir, 
                 ratio = self.ratio,
                 subset = "testing",
                 resize = self.resize,
                 seed = self.seed)
    
    def get_sample_count(self):
        print(f"Training: {len(self.train_data)}")
        print(f"Validating: {len(self.valid_data)}")
        print(f"Testing: {len(self.test_data)}")
    
    def logging(self, train_loss:float , train_acc: float, valid_loss:float, valid_acc:float):
        pass
    
    def export_log(self, rdir:str, extenstion:str = "parquet"):
        pass
    
    def clear_buffer(self):
        del self.train_data
        del self.test_data
        del self.valid_data
        del self.model
    
    def update(self):
        self.model.train()
        for e in trange(self.ep):
            for batch in range(0, len(self.train_data), self.bs):
                if len(self.train_data) - batch > self.bs:
                    batch_indices = [x for x in range(batch, batch + self.bs)]
                else:
                    batch_indices = [x for x in range(batch, len(self.train_data))]
                X, y = self.train_data[batch_indices]

                pred = self.model((X/255).to(self.device))
                loss = self.loss_fn(pred, y.to(self.device, dtype = torch.float))

                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

                print(f"Loss: {loss.item()}")

In [27]:
# Metadata

# Training Setup
training_mode = "development"

# Data

monitor = Training(mode = training_mode)

In [28]:
# Unitest

monitor.get_sample_count()

Training: 425
Validating: 425
Testing: 425


In [29]:
monitor.update()

  0%|                                                                                            | 0/1 [00:00<?, ?it/s]


AttributeError: 'Training' object has no attribute 'device'

# Evaluation