In [2]:
import os
import gc
from glob import glob
import warnings
import random
import easydict
import copy
from collections import defaultdict
import math
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import font_manager, rc
import itertools
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import _LRScheduler
from torchvision.transforms.functional import to_pil_image
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2
import timm
from tqdm.auto import tqdm
import torchstain

In [3]:
def seed_everything(random_seed: int):
    torch.manual_seed(random_seed)
    torch.cuda.manual_seed(random_seed)
    torch.cuda.manual_seed_all(random_seed) # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(random_seed)
    random.seed(random_seed)
    os.environ['PYTHONHASHSEED'] = str(random_seed)
    print('seed setting complete')

In [4]:
import pickle

def load_dataset_from_pickle(file_name):
    """
    .pkl 파일로부터 데이터셋을 로드합니다.
    
    :param file_name: 로드할 .pkl 파일의 이름
    :return: 파일에서 로드된 데이터셋
    """
    with open(file_name, 'rb') as file:
        dataset = pickle.load(file)
    return dataset

In [5]:
class MILDataset(Dataset): # 참고 https://github.com/MSKCC-Computational-Pathology/MIL-nature-medicine-2019/blob/master/MIL_train.py    
    def set_mode(self, mode):
        self.mode = mode

    def __getitem__(self, idx):
        slide_idx = self.slide_idx[idx]
        img = self.patch_list[idx]
        
        transform = A.Compose([
            A.Resize(224, 224),
            A.Rotate(),
            A.HorizontalFlip(),
            A.VerticalFlip(),
            # A.ColorJitter(),
            # A.CLAHE(clip_limit=1.0, tile_grid_size=(8,8)),
            A.CLAHE(p=0.3),
            A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
            ToTensorV2()
        ])
        # Apply the transformations
        img = transform(image=img)["image"]
        
        label = self.label_list[slide_idx]
        return img, label
        
    
    def __len__(self):
        return len(self.patch_list)

In [6]:
# class MILDataset(Dataset): # 참고 https://github.com/MSKCC-Computational-Pathology/MIL-nature-medicine-2019/blob/master/MIL_train.py    
#     def set_mode(self, mode):
#         self.mode = mode

#     def __getitem__(self, idx):
#         slide_idx = self.slide_idx[idx]
#         img = self.patch_list[idx]
        
#         transform = A.Compose([
#             A.Resize(224, 224),
#             A.Rotate(),
#             A.HorizontalFlip(),
#             A.VerticalFlip(),
#             # A.ColorJitter(),
#             # A.CLAHE(clip_limit=1.0, tile_grid_size=(8,8)),
#             A.CLAHE(p=0.3),
#             # A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
#             ToTensorV2()
#         ])
#         # Apply the transformations
#         img = transform(image=img)["image"]
#         stain_normalizer = torchstain.normalizers.MacenkoNormalizer(backend='torch')
#         stain_normalizer.fit(img)
#         normed_img = stain_normalizer.normalize(I=img, stains=False)
#         normed_img = normed_img[0]/255.0
        
#         label = self.label_list[slide_idx]
#         return normed_img, label
        
    
#     def __len__(self):
#         return len(self.patch_list)
    

In [10]:
#train_df = pd.read_csv('../total_split_train_val/fold_4_train.csv')
train_df = pd.read_csv('../dataset/train_dataset.csv')
train_pkl_list = train_df['Slide_name'].tolist()
len(train_pkl_list)

30

In [12]:
train_pkl_list[0]

'resize_normal_001'

In [11]:
int(train_pkl_list[0].split('_')[1])

ValueError: invalid literal for int() with base 10: 'normal'

In [31]:
train_wsi = load_dataset_from_pickle(train_pkl_list)

TypeError: unhashable type: 'list'

In [30]:
train_wsi_data_loader = DataLoader(train_wsi, batch_size=batch_size, shuffle=True, drop_last=True)

NameError: name 'train_wsi' is not defined

In [6]:
train_df.columns

Index(['index', 'Slide_name', 'Patient_ID', 'Recurrence', 'Location',
       'Diagnosis', 'Growth phase', 'Size of tumor', 'Depth of invasion',
       'Level of invasion', 'Mitosis', 'Histologic subtype', 'Tumor cell type',
       'Surgical margin', 'Lymph node', 'Breslow thickness',
       'Precursor lesion', 'Date_of_diagnosis', 'Date_of_recurrence',
       'recurrence_period', 'psudo_label'],
      dtype='object')

In [8]:
recurrence_counts = train_df['Recurrence'].value_counts()

In [9]:
recurrence_counts

Recurrence
0    552
1    169
Name: count, dtype: int64

In [14]:
torch.tensor([len(train_df)/recurrence_counts.get(1,0)])

tensor(4.2663, dtype=torch.float64)

In [40]:
k = [1,2,3,4,5,6]

In [41]:
random.shuffle(k)

In [42]:
k

[5, 1, 3, 4, 6, 2]

In [47]:
valid_df = pd.read_csv('../total_split_train_val/fold_4_val.csv')
valid_pkl_name_list = valid_df['Slide_name'].tolist()
valid_pkl_list=[]
for name in valid_pkl_name_list :
    valid_pkl_list.append(f'../re_pickle_train_patch/{name}_patch.pkl')
len(valid_pkl_list)

173

In [76]:
valid_wsi = load_dataset_from_pickle(valid_pkl_list[0])

In [77]:
valid_loader=DataLoader(valid_wsi,batch_size=1, shuffle=True, drop_last=False)

In [82]:
for patch, label in valid_loader : # transposed_tensor = torch.transpose(tensor, 0, 2).contiguous()
    print(patch.shape)
    patch = torch.transpose(patch, 1, 3).contiguous()
    print(patch.shape)
    break

torch.Size([1, 224, 224, 3])
torch.Size([1, 3, 224, 224])


In [79]:
!nvidia-smi

Sat Jan 20 15:31:12 2024       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.65.01    Driver Version: 515.65.01    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA RTX A6000    On   | 00000000:48:00.0 Off |                  Off |
| 30%   36C    P8    29W / 300W |    786MiB / 49140MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [33]:
SEED = 42
PATCH_SIZE = (224, 224) # width, height
OVERLAP_RATIO = 0.5
TISSUE_AREA_RATIO = 0.5
DROP_RATE = 0.5
TOPK = 5 # MIL top K
EPOCH = 20
LEARNING_RATE = 1e-5
WEIGHT_DECAY = 3e-5
device = 'cuda' if torch.cuda.is_available() else 'cpu'
GPU_IDX = 'cuda'
DATE = '240102'
seed_everything(SEED)

seed setting complete


In [22]:
from backbone import BackboneResNet
from dsmil import IClassifier, BClassifier, MILNet
from sklearn.metrics import roc_curve, roc_auc_score, precision_recall_fscore_support, accuracy_score
from sklearn.utils import shuffle

In [23]:
feature_extractor = BackboneResNet('resnet18')

Feature extractor: resnet18


In [24]:
instance_classifier = IClassifier(backbone=feature_extractor, freeze=True, out_dim=1)
bag_classifier = BClassifier(input_size=512, output_class=1, nonlinear=True)

In [25]:
milnet = MILNet(instance_classifier, bag_classifier).to(device)
pos_weight = torch.tensor([4.0]).to(device)
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
optimizer = torch.optim.Adam(milnet.parameters(), lr=LEARNING_RATE, betas=(0.5, 0.9), weight_decay=WEIGHT_DECAY)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, EPOCH, 0)

In [26]:
train_pkl_list_test = ['../pickle_train_patch/dataset_train_894_patch.pkl']

In [36]:
batch_size=800

In [19]:
gpu_properties = torch.cuda.get_device_properties(0)
print(gpu_properties)

_CudaDeviceProperties(name='NVIDIA RTX A6000', major=8, minor=6, total_memory=48685MB, multi_processor_count=84)


In [39]:
def collate_fn(batch):
    # 각 배치의 patch 개수에 따라 동적으로 배치 크기를 조정
    max_patch_count = max(len(item[1]) for item in batch)
    batch_size = min(800, max_patch_count)
    
    # 나머지 collate_fn 로직 추가

    return batch

In [37]:
milnet.train()
for epoch in tqdm(range(EPOCH)) :
    print(f'-------######{epoch} start######------')
    epoch_loss=0
    for train_wsi_path in train_pkl_list_test:
        train_wsi = load_dataset_from_pickle(train_wsi_path)
        train_wsi_data_loader = DataLoader(train_wsi, batch_size=batch_size, shuffle=True, drop_last=True)
        if len(train_wsi_data_loader)==0:
            break
        else :
            batch_loss=0
            for wsi_patch, wsi_label in train_wsi_data_loader :
                label = torch.tensor([[wsi_label[0]]])
                wsi_patch, label = wsi_patch.to(device).float(), label.to(device).float()
                optimizer.zero_grad()
                instance_prediction, bag_prediction, _, _ = milnet(wsi_patch)
                max_prediction, index = torch.max(instance_prediction, 0)
                loss_bag = criterion(bag_prediction.view(1,-1), label)
                loss_instance = criterion(max_prediction.view(1,-1), label)
                loss_total = 0.5*loss_bag+0.5*loss_instance
                batch_loss += loss_total.item()
                loss_total.backward()
                optimizer.step()
            epoch_loss += batch_loss/(len(train_wsi_data_loader))
    print('epoch_loss :' ,epoch_loss/len(train_pkl_list_test))
    print(torch.cuda.memory_allocated() / 1e6)
    break
    if True  :
        bag_labels = []
        bag_predictions = []
        milnet.eval()
        with torch.no_grad() :
            for valid_wsi_path in valid_pkl_list[-10:] :
                valid_wsi = load_dataset_from_pickle(valid_wsi_path)
                valid_loader=DataLoader(valid_wsi,batch_size=batch_size, shuffle=True, drop_last=False)
                print(valid_wsi_path, len(valid_loader), sep='\n')
                batch_result = []
                batch_label = []
                for data, label in valid_loader:
                    data = data.to(device).float()
                    batch_label.append(int(label[0]))
                    instance_prediction, bag_prediction, _, _ = milnet(data)
                    bag_prediction = F.softmax(bag_prediction, dim=1)
                    bag_prediction = bag_prediction.item()
                    batch_result.append(int(bag_prediction))
                bag_predictions.append(int(any(batch_result)))
                bag_labels.append(int(any(batch_label)))
                # print(bag_labels, bag_predictions, sep='\n')
    #         if all(value == 1 for value in bag_predictions) or all(value == 0 for value in bag_predictions):
    #             print("모두 1 또는 모두 0입니다.")
    #         else :
    #             try :
    #                 precision, recall, fscore, _ = precision_recall_fscore_support(bag_labels, bag_predictions)
    #                 auc_value = roc_auc_score(bag_labels, bag_predictions)
    #                 accuracy = accuracy_score(bag_labels, bag_predictions)
    #                 print('accuracy : ', accuracy, 'precision : ', precision, 'auc_value : ',auc_value)
    #             except :
    #                 print(f'bag_label : {bag_labels}, bag_prediction : {bag_predictions}')
        # milnet.train()
    scheduler.step()
    random.shuffle(train_pkl_list)
    if ((epoch+1)%5)==0 :
        torch.save(milnet, f'./dsmilnet{epoch+1}.pt')
        

  0%|          | 0/20 [00:00<?, ?it/s]

-------######0 start######------
epoch_loss : 2.5949981212615967
551.133696


In [35]:
for vaild_wsi_path in valid_pkl_list :
    valid_wsi = load_dataset_from_pickle(vaild_wsi_path)
    valid_loader=DataLoader(valid_wsi,batch_size=batch_size, shuffle=True, drop_last=True)
    for data, label in valid_loader:
        data.to(device).float()
        print(data.shape, label)
        break
    break

torch.Size([250, 3, 512, 512]) tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0])


In [12]:
aa = load_dataset_from_pickle('../pickle_train_patch/dataset_train_430_patch.pkl')

In [13]:
len(aa)

765

In [18]:
aa_loader=DataLoader(aa, batch_size=800, shuffle=True, drop_last=True)

In [19]:
len(aa_loader)

0

In [17]:
for batch in aa_loader : 
    batch_count += 1
    print(batch[0].shape,batch[1])
    try :
        data, label = batch
    except :
        print(f'Error: too many values to unpack (expected 2) in {valid_wsi_path}')
        break
    print(hi)

torch.Size([250, 3, 224, 224]) tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0])


NameError: name 'hi' is not defined

TypeError: 'DataLoader' object is not subscriptable

In [74]:
!nvidia -smi

/bin/bash: nvidia: command not found
