In [1]:
import os
import numpy as np
from PIL import Image
import torch
import torchvision
import torchvision.transforms as transforms
import tensorflow as tf
import matplotlib.pyplot as plt

In [2]:
train_image_dir = './data/train/image/'
train_mask_dir = './data/train/mask/'
test_image_dir = './data/test/image/'
test_mask_dir = './data/test/mask/'

In [3]:
#Load the data
NumOfImages = 20
channels = 3
height = 592
width = 576
imgs_array = np.empty((NumOfImages,height,width,channels))
masks_array = np.empty(((NumOfImages,height,width,channels)))
for path, subdirs, files in os.walk(train_image_dir): #list all files, directories in the path
    for i in range(len(files)):
        img = Image.open(train_image_dir+files[i])
        imgs_array[i] = np.asarray(img)
for path, subdir, files in os.walk(train_mask_dir):
    for i in range(len(files)):
        mask = Image.open(train_mask_dir+files[i])
        masks_array[i] = np.asarray(mask)

In [4]:
#Augmentation_1 : Cut images
temp = []
for i in range(imgs_array.shape[0]):
    for x in range(imgs_array.shape[1] // 48):
        for y in range(imgs_array.shape[2] // 48):
            temp.append(imgs_array[i,y*48:(y+1)*48,x*48:(x+1)*48])
imgs_array = np.array(temp)

In [5]:
#Normalize the data
imgs_normalized = np.empty((imgs_array.shape[0],channels,48,48))
for i in range(len(imgs_array)):
    imgs_array[i] = imgs_array[i]/255
    m = np.mean(imgs_array[i])
    s = np.std(imgs_array[i])
    transformFunc = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(m,m,m),std=(s,s,s))
    ])
    imgs_normalized[i] = transformFunc(imgs_array[i])

In [6]:
#Define the neural network
from keras.models import Model
from keras.layers import Input, concatenate, Conv2D, MaxPooling2D, UpSampling2D, Reshape, core, Dropout #core内部定义了一系列常用的网络层，包括全连接、激活层等
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, LearningRateScheduler

def get_unet(n_ch,patch_height,patch_width):
    inputs = Input(shape=(n_ch,patch_height,patch_width))
    #data_format：字符串，“channels_first”或“channels_last”之一，代表图像的通道维的位置。
    #以128x128的RGB图像为例，“channels_first”应将数据组织为（3,128,128），而“channels_last”应将数据组织为（128,128,3）。该参数的默认值是~/.keras/keras.json中设置的值，若从未设置过，则为“channels_last”。
    conv1 = Conv2D(32, (3, 3), activation='relu', padding='same',data_format='channels_first')(inputs)
    conv1 = Dropout(0.2)(conv1)
    conv1 = Conv2D(32, (3, 3), activation='relu', padding='same',data_format='channels_first')(conv1)
    pool1 = MaxPooling2D((2, 2))(conv1)
    #
    conv2 = Conv2D(64, (3, 3), activation='relu', padding='same',data_format='channels_first')(pool1)
    conv2 = Dropout(0.2)(conv2)
    conv2 = Conv2D(64, (3, 3), activation='relu', padding='same',data_format='channels_first')(conv2)
    pool2 = MaxPooling2D((2, 2))(conv2)
    #
    conv3 = Conv2D(128, (3, 3), activation='relu', padding='same',data_format='channels_first')(pool2)
    conv3 = Dropout(0.2)(conv3)
    conv3 = Conv2D(128, (3, 3), activation='relu', padding='same',data_format='channels_first')(conv3)

    up1 = UpSampling2D(size=(2, 2))(conv3)
    up1 = concatenate([conv2,up1],axis=1)
    conv4 = Conv2D(64, (3, 3), activation='relu', padding='same',data_format='channels_first')(up1)
    conv4 = Dropout(0.2)(conv4)
    conv4 = Conv2D(64, (3, 3), activation='relu', padding='same',data_format='channels_first')(conv4)
    #
    up2 = UpSampling2D(size=(2, 2))(conv4)
    up2 = concatenate([conv1,up2], axis=1)
    conv5 = Conv2D(32, (3, 3), activation='relu', padding='same',data_format='channels_first')(up2)
    conv5 = Dropout(0.2)(conv5)
    conv5 = Conv2D(32, (3, 3), activation='relu', padding='same',data_format='channels_first')(conv5)
    #
    #1×1的卷积的作用
	#大概有两个方面的作用：1. 实现跨通道的交互和信息整合2. 进行卷积核通道数的降维和升维。
    conv6 = Conv2D(2, (1, 1), activation='relu',padding='same',data_format='channels_first')(conv5)
    conv6 = core.Reshape((2,patch_height*patch_width))(conv6) #此时output的shape是(batchsize,2,patch_height*patch_width)
    conv6 = core.Permute((2,1))(conv6)    #此时output的shape是(Npatch,patch_height*patch_width,2)即输出维度是(Npatch,2304,2)
    ############
    conv7 = core.Activation('softmax')(conv6)
    model = Model(inputs=inputs, outputs=conv7)
    # sgd = SGD(lr=0.01, decay=1e-6, momentum=0.3, nesterov=False)
    model.compile(optimizer=Adam(lr=0.001), loss='categorical_crossentropy',metrics=['accuracy'])
    return model

In [7]:
#Train
imgs_array = imgs_array[:,np.newaxis, ...]
model = get_unet(imgs_array.shape[1],imgs_array.shape[2],imgs_array.shape[3])

In [8]:
model.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 1, 48, 48)]  0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 32, 48, 48)   320         input_1[0][0]                    
__________________________________________________________________________________________________
dropout (Dropout)               (None, 32, 48, 48)   0           conv2d[0][0]                     
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 32, 48, 48)   9248        dropout[0][0]                    
_______________________________________________________________________________________

In [9]:
checkpointer = ModelCheckpoint(filepath = 'best_weights.h5', verbose = 1, monitor = 'val_acc', mode='auto', save_best_only=True)
model.compile(optimizer=Adam(lr=0.001),loss='categorical_crossentropy',metrics=['accuracy'])

In [10]:
masks_array.shape

(20, 592, 576, 3)