In [1]:
from datasets.dataset import *
from models.meta import Meta
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn as nn
import torch
import timm
from tensorboardX import SummaryWriter
import numpy as np
import pandas as pd
import os
import sys
import shutil
from PIL import Image
from models.basenet import *
from utils import *
from configs.config_setting_baseline import setting_config
from copy import deepcopy
import sklearn.metrics as metrics
from torch.cuda.amp import autocast, GradScaler
import torch.nn.init as init
import segmentation_models_pytorch as smp

import warnings
warnings.filterwarnings("ignore")

config = setting_config

In [2]:
def preprocess_batch(batch):
    support_images = batch['support_images'].squeeze(0)
    support_masks = batch['support_masks'].squeeze(0)
    query_images = batch['query_images'].squeeze(0)
    query_masks = batch['query_masks'].squeeze(0)
    return support_images, support_masks, query_images, query_masks

# the function of copying the images
def copy_file_to_folder(source_file, dest_folder):
    if not os.path.exists(dest_folder):
        os.makedirs(dest_folder)

    dest_path = os.path.join(dest_folder, os.path.basename(source_file))
    shutil.copy(source_file, dest_path)

def evaluation_api(predicted_list,groudtruth_list):
    pre = np.array([item for sublist in predicted_list for item in sublist]).reshape(-1)
    gts = np.array([item for sublist in groudtruth_list for item in sublist]).reshape(-1)
    # confusion_matrix = metrics.confusion_matrix(gts,pre)
    # TN, FP, FN, TP = confusion[0,0], confusion[0,1], confusion[1,0], confusion[1,1] 
    dice = metrics.f1_score(gts,pre)

    return dice

def evaluation_epoch(predicted_list,groundtruth_list):
    TP = [0]*config.num_classes
    FP = [0]*config.num_classes
    FN = [0]*config.num_classes
    dice = [0.0]*config.num_classes
    
    for i in range(len(predicted_list)):
        preds = np.array(predicted_list[i]).reshape(-1)
        gts = np.array(groundtruth_list[i]).reshape(-1)
        for j in range(len(preds)):
            if preds[j] == gts[j]:
                TP[gts[j]] += 1
            else:
                FP[preds[j]] += 1
                FN[gts[j]] += 1        
    
    for i in range(config.num_classes):
        dice[i] = (2 * TP[i])/(FP[i]+FN[i]+2*TP[i]+1)

    mdice = (2*np.sum(TP))/(np.sum(FP)+np.sum(FN)+2*np.sum(TP)+1)    
    return dice,mdice

def evaluation_basenet(base_net,query_images,query_masks,criterion):
    predicted = base_net(query_images)
    loss = criterion(predicted,query_masks)
    predicted = torch.argmax(predicted,dim=1).long()
    predict_numpy = predicted.detach().cpu().numpy().reshape(-1)
    masks_numpy = query_masks.long().detach().cpu().numpy().reshape(-1)
    accuracy = metrics.accuracy_score(masks_numpy,predict_numpy)
    f1_score = metrics.f1_score(masks_numpy,predict_numpy,average=None)
    return accuracy,f1_score,loss

def initialize_weights_he(model):
    for param in model.parameters():
        init.kaiming_uniform_(param, mode='fan_in', nonlinearity='relu')

def initialize_weights_xavier(model):
    for param in model.parameters():
        init.xavier_uniform_(param)

def initialize_weights_normal(model):
    for param in model.parameters():
        init.normal_(param, mean=0, std=1)

def remove_exsits_folder(folderpath):
    if os.path.exists(folderpath):
        shutil.rmtree(folderpath)


In [3]:
print('#----------Generating data----------#')
images_resources_path = "./data/HAM10000/origin/images/"         # the resource folder of images
masks_resources_path = "./data/HAM10000/origin/masks/"           # the resource folder of masks
ratio = [0.6,0.2]     # the ratio point of train dataset and validation set and testset
limit_num = 300
categories = config.categories
categories_dictionary = {}
category_id = 1
# prepare the csv for groundtruth
origin_groundtruth_csv = "./data/HAM10000/origin/groundtruth/HAM10000_groundtruth.csv"   # read the csv file
origin_groundtruth = pd.read_csv(origin_groundtruth_csv)    # read the csv file of groundtruth

# generating the folders for each category in train folder and test folder
# create folders for each categories
trainset_images_path = "./data/HAM10000/train/images/"     # the images path for train dataset
trainset_masks_path = "./data/HAM10000/train/masks/"     # the masks path for train dataset
valset_images_path = "./data/HAM10000/val/images/"     # the images path for validation dataset
valset_masks_path = "./data/HAM10000/val/masks/"      # the masks path for validation dataset
testset_images_path = "./data/HAM10000/test/images/"     # the images path for test dataset
testset_masks_path = "./data/HAM10000/test/masks/"      # the masks path for test dataset

for category in categories:
    # prepare the address for folders
    category_images_train_path = os.path.join(trainset_images_path,category)
    category_masks_train_path = os.path.join(trainset_masks_path,category)
    category_images_val_path = os.path.join(valset_images_path,category)
    category_masks_val_path = os.path.join(valset_masks_path,category)
    category_images_test_path = os.path.join(testset_images_path,category)
    category_masks_test_path = os.path.join(testset_masks_path,category)
    #delete the previously exsited folders
    remove_exsits_folder(category_images_train_path)
    remove_exsits_folder(category_masks_train_path)
    remove_exsits_folder(category_images_val_path)
    remove_exsits_folder(category_masks_val_path)
    remove_exsits_folder(category_images_test_path)
    remove_exsits_folder(category_masks_test_path)
    # create corresponding folder for each categories
    os.makedirs(category_images_train_path, exist_ok=True)
    os.makedirs(category_masks_train_path, exist_ok=True)
    os.makedirs(category_images_val_path, exist_ok=True)
    os.makedirs(category_masks_val_path, exist_ok=True)
    os.makedirs(category_images_test_path, exist_ok=True)
    os.makedirs(category_masks_test_path, exist_ok=True)

    # generate the data in trainset and testset for each categories
    dest_folder_images = "./data/HAM10000/train/images/"+category    # the destination train set folder of copying the images
    dest_folder_masks = "./data/HAM10000/train/masks/"+category    # the destination trian set folder of copying the masks
    dest_folder_images_change_val = "./data/HAM10000/val/images/"+category     # the destination folder of test set images
    dest_folder_masks_change_val = "./data/HAM10000/val/masks/"+category      # the destination folder of test set masks
    dest_folder_images_change_test = "./data/HAM10000/test/images/"+category     # the destination folder of test set images
    dest_folder_masks_change_test = "./data/HAM10000/test/masks/"+category      # the destination folder of test set masks
    data_categories = origin_groundtruth[origin_groundtruth['dx'] == category]      # extract each categories 
    data_categories = data_categories.sample(frac=1,random_state=config.seed)       # random sample the datagenerating
    length_categories = len(data_categories)
    # change_folder_point_valset = math.floor(length_categories * ratio[0])     # get the point to change directory name
    change_folder_point_valset = math.floor(limit_num)     # get the point to change directory name
    change_folder_middle_valset = math.floor(length_categories * ratio[0])
    change_folder_point_testset = math.floor(length_categories * (ratio[0]+ratio[1]))     # get the point to change directory name 
    elements_count = 0
    for image_name in data_categories['image_id']:      # each image_id in each categories
        if elements_count == change_folder_point_valset:
            dest_folder_images = dest_folder_images_change_val
            dest_folder_masks = dest_folder_masks_change_val
        elif elements_count == change_folder_point_testset:
            dest_folder_images = dest_folder_images_change_test
            dest_folder_masks = dest_folder_masks_change_test
        
        if elements_count >= change_folder_point_valset and elements_count < change_folder_middle_valset:
            elements_count += 1
            continue
        images_file = image_name+".jpg"
        masks_file = image_name+"_segmentation.png"
        source_image = images_resources_path+images_file       # the full path of source of image : path + image file name
        source_mask = masks_resources_path+masks_file       # the full path of source of mask : path + mask file name
        copy_file_to_folder(source_image,dest_folder_images)
        # masks should be preprocess to the form of output for network (Width*Height*Category)
        image = Image.open(source_mask)
        image_array = np.array(image)
        image_array[image_array == 255] = 1
        image = Image.fromarray(image_array)
        image.save(os.path.join(dest_folder_masks, masks_file))
        elements_count +=1
    categories_dictionary[category] = category_id       # add the category id in the categories_dictionary
    category_id += 1

#----------Generating data----------#


In [4]:
batch_size = config.batch_size
train_dataset = HAMALL_datasets(config, train=True)
train_loader = DataLoader(
    train_dataset, batch_size=batch_size, num_workers=config.num_workers)
val_dataset = HAMALL_datasets(config, train=False,val=True)
val_loader = DataLoader(
    val_dataset, batch_size=batch_size, num_workers=config.num_workers)
test_dataset = HAMALL_datasets(config, train=False)
test_loader = DataLoader(
    test_dataset, batch_size=batch_size, num_workers=config.num_workers)
print("trian_dataset length:",len(train_dataset))
print("val_dataset length:",len(val_dataset))
print("test_dataset length:",len(test_dataset))

trian_dataset length: 900
val_dataset length: 546
test_dataset length: 546
