#### 遥感图像通常很大，要对图像进行切割，并保存，可以直接保存图像的numpy数组。对于mask图像，我们通常要把它转换为one_hot形式，本次实验的数据是23分类，可若是将mask保存为（None,224,224,23）太占内存，所以在保存mask的时候，我们保存的是(None,224,224)的形式，其中像素值是0~22，每个位置的像素值代表这个像素的类别。

In [1]:

import numpy as np
import os
from libtiff import TIFF
from skimage import io

unit_width = 224  # 每个小图片的长和宽（正方形）
file_dir = '/input0/yuanyang_land/data/'

def readTif(file_name, isMask=False):
    '''
    读取一张tif格式的图片，返回图片的宽、高、通道数目、图片的array
    注：用io.imread()读取tif和用
    '''
    image=io.imread(file_dir+file_name)
    im_width = image.shape[1]
    im_height = image.shape[0]
    im_bands = 1
    if not isMask:
        im_width = image.shape[1]
        im_height = image.shape[0]
        im_bands = image.shape[2]
        image = image.swapaxes(0, 2)
        im_data = image.swapaxes(1, 2)
        image = im_data
#     print('readtif: image.shape',image.shape) #(4,3953,2833)(通道，高，宽) mask:(3952,2832)(高，宽)
    return {'width': im_width, 'height': im_height, 'band_num': im_bands, 'data': image}


def divide(img, mask, size, stride):
    #输入的图片是一张遥感大图,宽高分别以stride为步长，进行divide,如果size=224,stride=100,则每次以100为步长
    #将图片切割成（224,224）大小,当stride<size时，切割的图片是有重叠部分的，size越小，重叠的部分越多，当stride>size时，
    #切割的部分没有重叠，而且图片的有些部分甚至没有被保留下来
    #size:224,stride:100
    #img['data'].shape:(4,3953,2833)
    imgs, labels = [], []
    bands, height, width = img['band_num'], img['height'], img['width']
#     print('height',height,'stride',stride,'width',width)
#     print("img['data'].shape",img['data'].shape)#(4,3953,2833)
    for row in range(0, height, stride):
        for col in range(0, width, stride):
            if row + size > height or col + size > width:
                continue
            feature = []
            for band_id in range(bands):
                #feature里面apppend的大小是（224,224）
                feature.append(img['data'][band_id, row:row + size, col:col + size])
#                 print('2',img['data'][band_id, row:row + size, col:col + size].shape)

            label = mask['data'][row:row + size, col: col + size]
            imgs.append(feature)
            labels.append(label)

    imgs=np.array(imgs)
#     print('divide:','imgs.shape',imgs.shape)
    return imgs, labels


def get_data():
    '''
    输入的是所有images和mask的名字，返回的是经过切割后的数组（None,bands,224,224）,(None,224,224)
    '''
    img_names=['images/%02d_4bands.tif'%i for i in range(1,70)]
    mask_names=['labels/%02d_4bands_mask.tif'%i for i in range(1,70)]
    print(img_names)
    xs, ys = [], []
    for (img, mask) in zip(img_names, mask_names):
        img = readTif(img)
        mask = readTif(mask, True)
        imgs, labels = divide(img, mask, 224,100)  # 后两个参数为图片尺寸和步长
        for img, label in zip(imgs, labels):
            xs.append(img)
            ys.append(label)
    xs=np.array(xs)
    ys=np.array(ys)
    return xs, ys


xs, ys = get_data()

orishape = ys.shape

#(None*224*224,)
#首先将mask转化为一维的数组，unique_num是value和索引对，比如mask里面原来有一个像素为3，给它分配一个索引0，然后将二维的mask转换为
# 索引后的数组,eg：如果里面是23分类，则mask已经转化为里面的元素只有0～22的数组
ys = ys.reshape(-1)
unique_num = {v: i for i, v in enumerate(np.unique(ys))}
for i, v in enumerate(ys):
    ys[i] = unique_num[v]
ys = ys.reshape(orishape)

#pi会返回一个数组，这个数组里面的数<xs.shape[0]，然后可以根据pi打乱xs和ys数组里面的排序
pi = np.random.permutation(xs.shape[0])
xs, ys = xs[pi], ys[pi]
bound = int(len(ys)*0.9)
np.save('./data/train_image.npy', xs[:bound])
np.save('./data/train_annotation.npy', ys[:bound])
np.save('./data/validation_image.npy', xs[bound:])
np.save('./data/validation_annotation.npy', ys[bound:])

train_mask=np.load('./data/train_annotation.npy')
print('train_mask.shape',train_mask.shape)

['images/01_4bands.tif', 'images/02_4bands.tif', 'images/03_4bands.tif', 'images/04_4bands.tif', 'images/05_4bands.tif', 'images/06_4bands.tif', 'images/07_4bands.tif', 'images/08_4bands.tif', 'images/09_4bands.tif', 'images/10_4bands.tif', 'images/11_4bands.tif', 'images/12_4bands.tif', 'images/13_4bands.tif', 'images/14_4bands.tif', 'images/15_4bands.tif', 'images/16_4bands.tif', 'images/17_4bands.tif', 'images/18_4bands.tif', 'images/19_4bands.tif', 'images/20_4bands.tif', 'images/21_4bands.tif', 'images/22_4bands.tif', 'images/23_4bands.tif', 'images/24_4bands.tif', 'images/25_4bands.tif', 'images/26_4bands.tif', 'images/27_4bands.tif', 'images/28_4bands.tif', 'images/29_4bands.tif', 'images/30_4bands.tif', 'images/31_4bands.tif', 'images/32_4bands.tif', 'images/33_4bands.tif', 'images/34_4bands.tif', 'images/35_4bands.tif', 'images/36_4bands.tif', 'images/37_4bands.tif', 'images/38_4bands.tif', 'images/39_4bands.tif', 'images/40_4bands.tif', 'images/41_4bands.tif', 'images/42_4ban

In [4]:
import numpy as np
train_mask=np.load('./data/validation_annotation.npy')
print('train_mask.shape',train_mask.shape)

train_mask.shape (835, 224, 224)


In [5]:
train_mask=np.load('./data/train_annotation.npy')
print('train_mask.shape',train_mask.shape)

train_mask.shape (7514, 224, 224)


In [6]:
train_mask=np.load('./data/train_image.npy')
print('train_mask.shape',train_mask.shape)

train_mask.shape (7514, 4, 224, 224)
