# 整体流程

流程：导入包 -> 读取数据 -> 查看数据是否准确 -> 分离训练集和测试 -> 构建网络  
                     -> 编译 -> 训练 -> 可视化 -> 测试 -> 查看结果

# 导入包
系统文件操作函数：
a, b, c = next(os.walk(filepath))    
#第一个返回当前目录，第二个值返回当前目录的子目录名称，第三个值为当前目录下的文件名称

In [None]:
import os
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
plt.style.use("ggplot")
%matplotlib inline

from tqdm import tqdm, tnrange
from itertools import chain
from skimage.io import imread, imshow, concatenate_images
from skimage.transform import resize
from skimage.morphology import label
from sklearn.model_selection import train_test_split

from keras.models import Model, load_model
from keras.layers import Input, BatchNormalization, Activation, Dense, Dropout
from keras.layers.core import Lambda, RepeatVector, Reshape
from keras.layers.convolutional import Conv2D, Conv2DTranspose
from keras.layers.pooling import MaxPooling2D, GlobalMaxPool2D
from keras.layers.merge import concatenate, add
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
 
path_mask = "C:/Users/liu/Desktop/U-net网络分割大肠癌/数据集1/"    #1001/arterial phase/"
path_img = "C:/Users/liu/Desktop/U-net网络分割大肠癌/all_data/"
phase = "/arterial phase/"
#dir_a, dir_b, temp = next(os.walk(path_img))   #获取路径
#filename1 = next(os.walk(path_img+dir_b[1]+phase))[2]

# 定义读取数据的函数
读取图像的函数是
from keras.preprocessing.image import load_img
img = load_img(filename, color_mode='grayscale')
img = np.array(img)

# 如果c中包含多个类型的数据怎么办？
流程：构建路径 -> 读取路径内文件夹名称 -> 查看名称 -> 利用列表推导是求得特定类型的名称

path = "C:/Users/liu/Desktop/U-net网络分割大肠癌/数据集1/1001/arterial phase/"
filename_list = next(os.walk(path))[2]

测试过程：
temp = filename_list[0].lower()
print(temp)
b = ".dcm" in temp 
print(b)

可行方案：
mask_name = [x for x in filename_list if '.png' in x.lower()]  #lower() 将大写转小写

In [None]:
def get_data(path, phase):
    n = -1
    X = np.zeros((3057, 128, 128, 1), dtype='float32')
    ids = next(os.walk(path))[1]   #获取路径
    for id in tqdm(ids):
        temp_path = path+id+phase
        file_id = next(os.walk(temp_path))[2]
        file_id = [x for x in file_id if '.png' in x.lower()]
        for f_id in file_id:
            n += 1
            filename = temp_path+f_id    #计算图像的名称
            img = load_img(filename, color_mode='grayscale')    #读取图像
            x_img = np.array(img)/255
            x_img = resize(x_img[256:,128:384], (128,128,1), mode='constant', preserve_range=True)
            X[n] = x_img    #
    return X
x_train = get_data(path_img, phase)
y_train = get_data(path_mask, phase)

In [None]:
# 保存数组
np.save('data_small.npy', [x_train, y_train])

In [None]:
a = np.zeros([100,100])
print(a.shape)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

a = np.zeros([100, 100])
fig, ax = plt.subplots(1, 2, figsize=(20,10))
ax[0].imshow(a, cmap='gray')
ax[1].imshow(a, cmap='gray')


# 查看数据是否准确
随机读取一张图片 -> 子图像显示 -> 绘制边界 -> 显示

In [None]:
idx = random.randint(0, len(x_train))  #随机正整数
print(idx)
img = x_train[idx].squeeze()    #获取图片
mask = y_train[idx].squeeze()
has_mask = y_train[idx].max()>0  #查看是否有掩膜

fig, ax = plt.subplots(1,2,figsize=(20,10))

ax[0].imshow(img, cmap = 'gray')
if has_mask:
    ax[0].contour(mask, colors='r', levels=[0.5])   #在原图绘制掩膜曲线
ax[0].set_title('Original')

ax[1].imshow(mask, cmap = 'gray')    #绘制掩膜图像
ax[1].set_title('Mask')


# 分离训练集和测试集
X_train, X_valid, y_train, y_valid = train_test_split(x_train, x_mask, test_size=0.15)

In [None]:
x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.15)

# 构建网络
定义conv2d_block函数  
卷积层 -> 归一化(再激活函数) -> 卷积层 -> 归一化(再激活函数)  
定义get_unet函数  
conv2d_block -> Maxpooling2D -> Dropout2D -> ... 4次maxpooling ... -> Conv2DTranspose -> concatenate -> Dropout  
-> conv2d_block -> ...4次Conv2DTranspose -> 1大小filter输出output 


In [None]:
def conv2d_block(input_tensor, n_filters=16, kernel_size=3, batchnorm=True):
    # the first layer
    x = Conv2D(n_filters, kernel_size, padding='same')(input_tensor)
    if batchnorm:
        x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    # the second layer
    x = Conv2D(n_filters, kernel_size, padding='same')(x)
    if batchnorm:
        x = BatchNormalization()(x)
    X = Activation('relu')(x)
    return X

In [None]:
def get_unet(input_img, n_filters=16, dropout=0.5, batchnorm=True):
    # contracting path
    c1 = conv2d_block(input_img, n_filters=n_filters*1, kernel_size=3, batchnorm=batchnorm)
    p1 = MaxPooling2D((2,2))(c1)
    p1 = Dropout(dropout*0.5)(p1)

    c2 = conv2d_block(p1, n_filters=n_filters*2, kernel_size=3, batchnorm=batchnorm)
    p2 = MaxPooling2D((2,2))(c2)
    p2 = Dropout(dropout)(p2)

    c3 = conv2d_block(p2, n_filters=n_filters*4, kernel_size=3, batchnorm=batchnorm)
    p3 = MaxPooling2D((2,2))(c3)
    p3 = Dropout(dropout)(p3)

    c4 = conv2d_block(p3, n_filters=n_filters*8, kernel_size=3, batchnorm=batchnorm)
    p4 = MaxPooling2D((2,2))(c4)
    p4 = Dropout(dropout)(p4)

    c5 = conv2d_block(p4, n_filters=n_filters*16, kernel_size=3, batchnorm=batchnorm)

    # contracting path
    u6 = Conv2DTranspose(n_filters*8, (3,3), strides=(2,2), padding='same')(c5)
    u6 = concatenate([u6, c4])
    u6 = Dropout(dropout)(u6)
    c6 = conv2d_block(u6, n_filters=n_filters*8, kernel_size=3, batchnorm=batchnorm)

    u7 = Conv2DTranspose(n_filters * 4, (3, 3), strides=(2, 2), padding='same')(c6)
    u7 = concatenate([u7, c3])
    u7 = Dropout(dropout)(u7)
    c7 = conv2d_block(u7, n_filters=n_filters * 4, kernel_size=3, batchnorm=batchnorm)

    u8 = Conv2DTranspose(n_filters * 2, (3, 3), strides=(2, 2), padding='same')(c7)
    u8 = concatenate([u8, c2])
    u8 = Dropout(dropout)(u8)
    c8 = conv2d_block(u8, n_filters=n_filters * 2, kernel_size=3, batchnorm=batchnorm)

    u9 = Conv2DTranspose(n_filters * 1, (3, 3), strides=(2, 2), padding='same')(c8)
    u9 = concatenate([u9, c1])
    u9 = Dropout(dropout)(u9)
    c9 = conv2d_block(u9, n_filters=n_filters * 1, kernel_size=3, batchnorm=batchnorm)

    outputs = Conv2D(1, (1,1), activation='sigmoid')(c9)
    model = Model(inputs=[input_img],outputs=[outputs])
    return model

# 编译
输入一张测试图片，编译网络，查看网络结构

In [None]:
# 加载
input_img = Input((128, 128,1), name='img')
model = get_unet(input_img, n_filters=16, dropout=0.05, batchnorm=True)

model.compile(optimizer=Adam(), loss="binary_crossentropy", metrics=['accuracy'])
#model.summary()

In [None]:
Input((128, 128,1), name='img')

# 训练网络
设置训练的模式 -> 设置训练参数 

In [None]:
callbacks = [
    EarlyStopping(patience=10, verbose=1),
    ReduceLROnPlateau(factor=0.1, patience=3, min_lr=0.00001, verbose=1),
    ModelCheckpoint('model-tgs-salt.h5', verbose=1,
                    save_best_only=True)]

In [None]:
print(y_valid.shape)

In [None]:
results = model.fit(x_train, y_train, batch_size=32, epochs=100, callbacks = callbacks,
                   validation_data=(x_valid, y_valid))

# 训练过程的可视化
显示loss数据的变化曲线  
从results.history['loss']获取loss的数据

In [None]:
plt.figure(figsize=(8, 8))
plt.title("Learning curve")
plt.plot(results.history["loss"], label='loss')
plt.plot(results.history["val_loss"], label = 'val_loss')
plt.plot(np.argmin(results.history["val_loss"]), np.min(results.history["val_loss"]), '-')
plt.xlabel("Epochs")
plt.ylabel("log_loss")
plt.legend()

# 测试集的评估

In [None]:
# 加载训练好的模型
# Load best model
model.load_weights('model-tgs-salt.h5')

# Evaluate on validation set (this must be equals to the best log_loss)
model.evaluate(x_valid, y_valid, verbose=1)

# 查看效果

In [None]:
# 网络预测
# Predict on train, and val
preds_train = model.predict(x_train,verbose=1)
preds_val = model.predict(x_valid, verbose=1)

# threshold predictions
y_train = (y_train>0.3).astype(np.uint8)
preds_train_t = (preds_train>0.3).astype(np.uint8)
preds_val_t = (preds_val>0.3).astype(np.uint8)

In [None]:
y_valid = (y_valid>0.3).astype(np.uint8)

In [None]:
# 查看分割效果
def plot_sample(X, y, preds, binary_preds, ix=None):
    if ix is None:
        ix = random.randint(0, len(X))
        while y[ix].max() == 0:
            ix = random.randint(0, len(X))
    has_mask = y[ix].max() > 0

    fig, ax = plt.subplots(1, 4, figsize=(20, 10))
    ax[0].imshow(X[ix, ..., 0], cmap='gray')
    if has_mask:
        ax[0].contour(y[ix].squeeze(), colors='r')
    ax[0].set_title('Ground Truth')

    ax[1].imshow(y[ix].squeeze())
    ax[1].set_title('Mask of doctor')

    ax[2].imshow(preds[ix, ..., 0], vmin=0, vmax=1)
    if has_mask:
        ax[2].contour(binary_preds[ix].squeeze(), colors='r')
    ax[2].set_title('Prediction')

    ax[3].imshow(binary_preds[ix].squeeze())
    ax[3].set_title('mask of prediction')

# 训练样本的分割效果
plot_sample(x_train, y_train, preds_train, preds_train_t)

# 测试样本的分割效果
for i in range(3):
    plot_sample(x_valid, y_valid, preds_val, preds_val_t)

# 统计图片个数
def get_data(path_img=path_img, path_mask=path_mask, phase=phase):
    count = 0;
    ids = next(os.walk(path_img))[1]   #获取路径
    for id in ids:
        temp_path = path_img+id+phase
        file_id = next(os.walk(temp_path))[2]
        count += len(file_id)
    return count
x = get_data()    
print(x)

a = np.array(img)
b = a[256:,128:384]
plt.imshow(b)

In [None]:
# 读取图片的其他代码的部分程序
for filename in ids:
    if ".png" in filename.lower():  # check whether the file's png
        filename_list.append(os.path.join(path,filename))