In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import pydicom
import copy
import time

import torch
from torch import nn, optim
from torchvision import transforms, io
from torchvision.transforms import functional as F
from torch.utils.data import DataLoader

In [2]:
import random
np.random.seed(2022)
random.seed(2022)
torch.manual_seed(2022)

<torch._C.Generator at 0x7f82847c2710>

In [3]:
def display(tr: torch.Tensor):
    infos = {
        'min': torch.amin(tr),
        'max': torch.amax(tr),
        'dtype': tr.dtype,
        'size': tr.size()
    }

    return infos

# Parameters

In [4]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
# device = "cpu"
print(f"Using {device} device")

Using cuda:0 device


In [5]:
batch_size = 16
split = .8
shuffle_dataset = True
random_seed= 2022
num_epochs = 10
conv_threshold = 30

lr = 1e-4

# Data

In [6]:
class SPECTDataset(torch.utils.data.Dataset):
    '''
    - split data into train, val (frac, 1-frac)
    - random_state set 2022 (fix random result)
    '''
    def __init__(self, root, train, frac, transform):
        self.root = Path(root)
        self.transform = transform
        df = pd.read_csv( str(self.root/ "DICOM/train.csv") )

        # Train / Validation data
        train_df = df.sample(frac=frac, random_state=2022, ignore_index=True)
        if train: self.list = train_df
        else: self.list = pd.concat( [df, train_df] ).drop_duplicates(keep=False, ignore_index=True)

        # edit file path
        self.list.FilePath = self.list.FilePath.apply(lambda _: self.root / _[1:])

    def __len__(self):
        return len(self.list)

    def __getitem__(self, idx):
        dcm = pydicom.read_file( str(self.list.FilePath[idx]) )

        # age, gender
        age = self.list.loc[idx, 'Age']
        gender = self.list.loc[idx, 'Gender']
        
        # label (1,2,3 -> 0.,1,2)
        label = int(self.list.Stage[idx]) - 1

        # Preprocessed Pixels: totensor, 3 channel
        pixel = dcm.pixel_array[ self.list.loc[idx, 'index'] ] # 用 index 當 column name 真的是天才
        # low, high = self.get_low_high(dcm)
        # pixeled = self.getWindow(pixel, low, high)
        # img = (pixeled - np.min(pixeled)) / (np.max(pixeled) - np.min(pixeled))
        img = torch.tensor(pixel.astype(np.float32))
        img = torch.stack([img, img, img], dim=0)

        seed = np.random.randint(1e9)
        random.seed(seed)
        torch.manual_seed(seed)

        img = self.transform(img)

        return img, age, gender, label


In [7]:
preprocess = transforms.Compose([
    transforms.CenterCrop(50), 
    # transforms.Normalize((62.2852, 62.2852, 62.2852), (76.8448, 76.8448, 76.8448)), # 跑 normalize 反而下降準確度
    transforms.Resize(224),
])

In [8]:
training_data = SPECTDataset(root="./data", train=True, frac=split, transform=preprocess)
validation_data = SPECTDataset(root="./data", train=False, frac=split, transform=preprocess)

In [9]:
print("訓練資料集數量：", len(training_data))
print("測試資料集數量：", len(validation_data))

訓練資料集數量： 129
測試資料集數量： 32


In [10]:
dataloaders = {
    'train': DataLoader(training_data, batch_size=batch_size, shuffle=shuffle_dataset),
    'val': DataLoader(validation_data, batch_size=batch_size, shuffle=shuffle_dataset)
}

In [11]:
for X, age, gender, y in dataloaders['val']:
    print("Shape of X [N, C, H, W]: ", X.shape)
    print("Shape of y: ", y.shape, y.dtype)
    print("Age: ", age)
    print("Gender: ", gender)

    break

Shape of X [N, C, H, W]:  torch.Size([16, 3, 224, 224])
Shape of y:  torch.Size([16]) torch.int64
Age:  tensor([25, 64, 74, 48, 81, 75, 54, 77, 75, 64, 50, 80, 67, 85, 72, 66])
Gender:  tensor([1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1])


# Model

In [12]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'vgg16', pretrained=True)

Using cache found in /home/azetry/.cache/torch/hub/pytorch_vision_v0.10.0


In [13]:
print(model)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [14]:
class SPECT_VGG16(nn.Module):
    def __init__(self):
        super(SPECT_VGG16, self).__init__()

        # 載入 VGG16 類神經網路結構
        self.model = torch.hub.load('pytorch/vision:v0.10.0', 'vgg16', pretrained=True)

        # 鎖定 VGG16 預訓練模型參數
        for param in self.model.parameters():
           param.requires_grad = False

        # 修改輸出層輸出數量
        self.model.classifier.add_module("7", nn.Linear(in_features=1000, out_features=20))

    def forward(self, x, age, gender):
        logits_ = self.model(x)

        # Add Age and Gender
        age.unsqueeze_(1)
        logits_ = torch.cat((logits_, age), dim=1)

        gender.unsqueeze_(1)
        logits_ = torch.cat((logits_, gender), dim=1)

        # Final Classifier (這樣寫不太好，但就先可以run)
        logits = nn.Linear(22, 3).to(device)(logits_)

        return logits

In [15]:
model = SPECT_VGG16().to(device)
print(model)

Using cache found in /home/azetry/.cache/torch/hub/pytorch_vision_v0.10.0


SPECT_VGG16(
  (model): VGG(
    (features): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace=True)
      (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): ReLU(inplace=True)
      (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (6): ReLU(inplace=True)
      (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (8): ReLU(inplace=True)
      (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (11): ReLU(inplace=True)
      (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (13): ReLU(inplace=True)
      (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (15): ReLU(inplace=True)
      (1

# Loss Function and Optimizer

In [16]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)

# Training

In [17]:
def train_model(dataloader, model, loss_fn, optimizer, num_epochs):
    since = time.time()

    # 儲存最佳參數
    prev_acc = 0.0
    best_acc = 0.0
    best_model_wts = copy.deepcopy(model.state_dict())

    # 計算是否收斂和提前結束
    count_cont = 0
    finish = False

    # Level: Epoch
    for epoch in range(num_epochs):
        print(f"Epoch {epoch}/{num_epochs-1}:")
        print("-"*8)

        # 每次 epoch 都要跑一次 training 和 validation
        # Level: Phase (train, val)
        for phase in ['train', 'val']:
            if phase == 'train': model.train()
            else: model.eval()

            running_loss = 0.0
            running_corrects = 0

            # 批次讀取資料進行訓練
            # Level: Batch Data
            for batch, (X, age, gender, y) in enumerate(dataloader[phase]):
                # 將資料放置於 GPU 或 CPU
                X, age, gender, y = X.to(device), age.to(device), gender.to(device), y.to(device)

                optimizer.zero_grad() # 重設參數梯度（gradient）

                # forward
                # 只有在訓練階段才要計算梯度
                with torch.set_grad_enabled(phase == 'train'): # phase = True or False
                    outputs = model(X, age, gender)                  # 計算預測值
                    _, preds = torch.max(outputs, 1)    # 計算預測結果
                    loss = loss_fn(outputs, y)          # 計算損失值（loss）

                    # 只有在訓練階段才要優化
                    if phase == 'train':
                        loss.backward()                 # 反向傳播（backpropagation）
                        optimizer.step()                # 更新參數

                # 統計
                running_loss += loss.item() * X.size(0) # Batch size
                running_corrects += torch.sum(preds == y.data)
            # End of Level: Batch Data

            epoch_loss = running_loss / len(dataloader[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloader[phase].dataset)

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            if phase == 'train':
                if epoch_acc == prev_acc: count_cont += 1
                else: count_cont = 0
                prev_acc = epoch_acc

                if count_cont > conv_threshold: 
                    print("Convergence. End training early.")
                    finish = True
                    break

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
        # End of Level: Phase (train, val)

        print("-"*8)
        if finish: break
    # End of Level: Epoch

    time_elapsed = time.time() - since

    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:4f}')

    # 載入模型最佳參數
    model.load_state_dict(best_model_wts)

    return model



In [18]:
model = train_model(dataloaders, model, criterion, optimizer, num_epochs)

Epoch 0/9:
--------
train Loss: 15.4169 Acc: 0.2868
val Loss: 8.9965 Acc: 0.5000
--------
Epoch 1/9:
--------
train Loss: 28.0438 Acc: 0.3333
val Loss: 21.4161 Acc: 0.2812
--------
Epoch 2/9:
--------
train Loss: 28.1416 Acc: 0.3411
val Loss: 34.0151 Acc: 0.3438
--------
Epoch 3/9:
--------
train Loss: 21.0863 Acc: 0.3488
val Loss: 23.2798 Acc: 0.2500
--------
Epoch 4/9:
--------
train Loss: 18.4104 Acc: 0.3411
val Loss: 39.2855 Acc: 0.2812
--------
Epoch 5/9:
--------
train Loss: 33.2450 Acc: 0.3023
val Loss: 44.3625 Acc: 0.3750
--------
Epoch 6/9:
--------
train Loss: 34.3497 Acc: 0.3256
val Loss: 25.0042 Acc: 0.3125
--------
Epoch 7/9:
--------
train Loss: 42.8089 Acc: 0.3876
val Loss: 18.7734 Acc: 0.3125
--------
Epoch 8/9:
--------
train Loss: 31.5940 Acc: 0.3256
val Loss: 19.8369 Acc: 0.2812
--------
Epoch 9/9:
--------
train Loss: 21.6501 Acc: 0.3023
val Loss: 62.9105 Acc: 0.1875
--------
Training complete in 0m 10s
Best val Acc: 0.500000


In [19]:
torch.save(model, "20221031001_vgg.pth")