In [1]:
import numpy as np
import matplotlib.pyplot as plt
from glob import glob
import SimpleITK as sitk
import tensorflow as tf
from tensorflow import keras
from keras.layers import AveragePooling3D
import time
import copy
from sklearn.cluster import KMeans
import pandas as pd
import gc
from sklearn.linear_model import LinearRegression
import math
import tensorflow as tf
from multiprocessing import Process, Manager
import datetime
import sys
import csv
import os
from tensorflow.keras.utils import to_categorical

sys.path.append('../')
sys.path.append('../Models/')
sys.path.append('../KEM_simulation/')
sys.path.append('../KEM_experiment/')

from utils import *
from GMM import *
from Kmeans import *
from experiment_auxiliary import *
from data_generate import *  # 参数生成文件
from KEM_SIMU import *  # KEM 类
from scipy.ndimage import zoom

In [2]:
num_gpu = 1
gpu_name = 0
os.environ["CUDA_VISIBLE_DEVICES"] = f"{gpu_name}"

In [3]:
K = 0  # the number of classes
index = 0 # tour the index th CT data: index = 0
depth = 99

lung_image_path = "/database/datasets/Classics/LUNA2016/IMAGES/"  # a folder of CT data
lung_mask_path = "/database/datasets/Classics/LUNA16-Mask/"  # a folder of lung masks
lung_image_file_list = glob(lung_image_path + "*.mhd")  # the absolute paths of the CT files

In [23]:
to_csv_path = "./experiment_SPE-2023-11-08.csv"
# to_csv_path = "./experiment_SPE-2023-11-10.csv"

# with open(to_csv_path, 'w', newline='', encoding='utf-8') as f:
#     csv_write = csv.writer(f)  
#     csv_write.writerow([f"index", "path", "concentrate_lung_time", 
#                         "kem_spe", "kmeans_spe", "gmm_spe"])

In [24]:
# a large dict to hold all the necessary information, which will be maily used via Process operations
stats_dict = {'lung_image_path': lung_image_path,
              'lung_mask_path': lung_mask_path,
              'lung_image_file_list': lung_image_file_list,
              'lung_threshold': -200,
              'to_csv_path': to_csv_path,
              'K': 3,
              'training_ratio': 0.8,
              'Ch': 0.2217,}

In [25]:
def experiment_compare_SPE_LUNA(stats_dict, status=None):
    # information
    index = stats_dict['index']
    np.random.seed(index)
    tf.random.set_seed(index)
    K = stats_dict["K"]
    Ch = stats_dict["Ch"]
    lung_threshold = stats_dict["lung_threshold"]
    training_ratio = stats_dict["training_ratio"]  # ratio of the voxel positions are used for training
    lung_threshold = stats_dict["lung_threshold"]
    
    sample_CT_path = stats_dict['lung_image_file_list'][index]
    sample_CT_array, lung_mask_array = get_original_data_newversion(stats_dict['lung_image_path'],
                                                                    stats_dict['lung_mask_path'], sample_CT_path)
#     if status is not None and status == "Large":
#         sample_CT_array = zoom(sample_CT_array, zoom=(0.5, 1, 1), order=0)
#         lung_mask_array = zoom(lung_mask_array, zoom=(0.5, 1, 1), order=0)
    print(f"---------------WE ARE LOADING {index}th PATIENT's CT with shape {sample_CT_array.shape}---------------")
    
    # Lung Concentration
    t1 = time.time()
    concentrated_data = lung_concentration(sample_CT_array, lung_threshold, 1, 0)
    t2 = time.time()
    print(f"lung concentration: {t2 - t1:.6f}")
    # rescale the data
    CT_min = concentrated_data.min()
    CT_max = concentrated_data.max()
    concentrated_data = (concentrated_data - CT_min) / (CT_max - CT_min)
    # convert to tensor with shape modified
    shape = concentrated_data.shape
    experiment_data = tf.cast(tf.convert_to_tensor(concentrated_data), tf.float32)
    experiment_data = tf.reshape(experiment_data, (1,) + experiment_data.shape + (1,))

    # generate a mask, if =1 then are maintained as training data
    position_mask = np.random.binomial(n=1, p=training_ratio, size=experiment_data.shape)
    position_mask = tf.convert_to_tensor(position_mask, dtype=tf.float32)
    training_data = position_mask * experiment_data
    # if =0 then are maintained as testing data
    testing_data = (1 - position_mask) * experiment_data
    
    # kmeans
    kmeans_model = Kmeans(K=K,
                          shape=shape, 
                          training_data=training_data, 
                          position_mask=position_mask, 
                          kmeans_sample_ratio=1/100/training_ratio,
                          testing_data=testing_data)
    kmeans_model.kmeans_algorithm(max_steps=10)
    kmeans_spe = kmeans_model.compute_prediction_error()
    # GMM
    gmm_model = GMM(K=K, 
                    shape=shape, 
                    training_data=training_data, 
                    position_mask=position_mask, 
                    kmeans_sample_ratio=1/100/training_ratio,
                    testing_data=testing_data)
    gmm_model.gmm_algorithm(max_steps=10, epsilon=5e-3, smooth_parameter=1e-20)
    gmm_spe = gmm_model.compute_prediction_error()
    # KEM
    bandwidth, kernel_shape = bandwidth_preparation_small(position_mask, Ch)
    kem_model = KEM_SIMU_complex(K=3, 
                                  shape=shape, 
                                  training_data=training_data, 
                                  position_mask=position_mask, 
                                  kernel_shape=kernel_shape, 
                                  bandwidth=bandwidth, 
                                  kmeans_sample_ratio=1/100/training_ratio,
                                  testing_data=testing_data)
    kem_model.kem_algorithm(max_steps=10, epsilon=5e-3, smooth_parameter=1e-20)
    kem_spe = kem_model.compute_prediction_error()
    print(f"[{index}'s SPE]\n\tkmeans:{kmeans_spe:.4f};\n\tGMM:{gmm_spe:.4f};\n\tKEM:{kem_spe:.4f}")
    with open(to_csv_path, 'a', newline='', encoding='utf-8') as f:
        csv_write = csv.writer(f)  
        csv_write.writerow([index, sample_CT_path, t2-t1, kem_spe, kmeans_spe, gmm_spe])

In [26]:
np.random.seed(0)
index_list = np.arange(0, len(lung_image_file_list), 1)
len(index_list)

882

In [18]:
idx_increase = int(len(index_list) / num_gpu)
i = gpu_name
idx_start = i * idx_increase
idx_end = (i + 1) * idx_increase

if i == num_gpu - 1:
    idx_end = len(index_list)
print(f"GPU{gpu_name} with index list: [{idx_start}:{idx_end}]")
gpu_index_list = index_list[idx_start:idx_end]

GPU0 with index list: [0:882]


In [19]:
exist_csv = pd.read_csv(to_csv_path)
print(exist_csv.shape)
exist_csv.head()

(881, 6)


Unnamed: 0,index,path,concentrate_lung_time,kem_spe,kmeans_spe,gmm_spe
0,0,/database/datasets/Classics/LUNA2016/IMAGES/1....,21.203661,0.002503,0.048607,0.067517
1,1,/database/datasets/Classics/LUNA2016/IMAGES/1....,34.086537,0.000255,0.006496,0.008329
2,2,/database/datasets/Classics/LUNA2016/IMAGES/1....,13.628275,0.000337,0.006762,0.009945
3,3,/database/datasets/Classics/LUNA2016/IMAGES/1....,13.041714,0.002107,0.044628,0.064694
4,4,/database/datasets/Classics/LUNA2016/IMAGES/1....,32.506401,0.000686,0.011194,0.016357


In [20]:
exist_csv.shape

(881, 6)

In [21]:
exist_list = list(exist_csv['index'])
rerun_list = []

for i in index_list:
    if i in exist_list:
        continue
    rerun_list.append(i)

In [22]:
for index in rerun_list:
    np.random.seed(index)
    tf.random.set_seed(index)
    stats_dict['index'] = index
    process_eval = Process(target=experiment_compare_SPE_LUNA, args=(stats_dict,))
    process_eval.start()
    process_eval.join()

---------------WE ARE LOADING 871th PATIENT's CT with shape (145, 512, 512)---------------
lung concentration: 13.575292


2023-11-11 03:41:28.786616: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-11-11 03:41:29.358969: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 46725 MB memory:  -> device: 0, name: NVIDIA A40, pci bus id: 0000:9e:00.0, compute capability: 8.6


From function(__init__): Randomly pick 0.0125 data for kmeans.


  super()._check_params(X)


From function(__init__): Initialize mu via kmeans(with K=3)
From function(__init__): Randomly pick 0.01251 data for kmeans.
From function(__init__): KMeans(with K=3) success, with time: 0.8822 seconds
	centers: [0.8474829  0.37916645 0.17306656]
From function(__init__): Initialize parameters successfully.
	pik_estimate:(3, 145, 512, 512, 1)
	pi_estimate: (3, 1, 1, 1, 1)
	mu_estimate: (3, 1, 1, 1, 1)
	sigma_estimate: (3, 1, 1, 1, 1)
From function(gmm_algorithm): Receive max_steps: 10.
########################## STEP 0 ##########################
	 Current pik difference: 0.221314
From function(gmm_algorithm): E step success.
pi_estimator: [0.7012315  0.0092172  0.08950388]

mu_estimate: [0.8474829  0.37916645 0.17306656]
sigma_estimator: [0.0061678  0.09284314 0.03777297]
	 Current pi difference: 0.099951
	 Current mu difference: 3.12388e-05
	 Current sigma difference: 0.0013021
From function(gmm_algorithm): M step success.
From function(gmm_algorithm): difference: 0.101284.
---This iter

2023-11-11 03:41:38.357416: I tensorflow/stream_executor/cuda/cuda_dnn.cc:368] Loaded cuDNN version 8101


########################## STEP 0 ##########################
	 Current pik difference: 0.221441
From function(kem_algorithm): E step success.
+++ From m_step: add smooth_parameter to pi_estimate
From m_step: add smooth_parameter to sigma_estimate
	 Current pi difference: 0.210157
	 Current mu difference: 1.2896e+24
	 Current sigma difference: nan
From function(kem_algorithm): M step success.
From function(kem_algorithm): difference: nan.
---This iteration step costs 14.5 seconds.---
[871's SPE]
	kmeans:0.0474;
	GMM:0.0707;
	KEM:0.0023


548-764