In [1]:
# DEPAI project
# organise your patches in two folders of training and test
# the script generates arrays of trainset and testset 
# the trainset has dimension (N x L x W x B) --N  - number of patches, L x w are length and height of the patches, B is the mumber of bands
# the testset has dimension (N) -- number of patches

In [2]:
from __future__ import division, print_function
import os
import math
import h5py
from PIL import Image
import subprocess, glob
import numpy as np
import time
from osgeo import gdal
%matplotlib inline

In [3]:
# normalize the top array
def norm_rgbn(data,gmax,gmin):
    """
    rexcale  the multispectral data [0,1]
    data--the multispectral array
    """
    data = data.astype(float)
    data_norm = (data - gmin)/(gmax - gmin)
    return data_norm

In [4]:
#functions for loading the images and converting them to arrays
def img_to_array(*images):
    """Convert an image or list of images to numpy arrays.

    Keyword arguments:
    *images -- list containing the images to be converted
    """
    imgarrays = []
    i = 0
    for img in images:
        arr = gtiff_to_array(img)
        imgarrays.append(arr)
    return imgarrays

# convert the geotiff to a numpy array
def gtiff_to_array(imgfname):
    """Transform a geotiff to numpy array.

    Keyword arguments:
    imgfnames -- filename of image to convert
    """
    ds = gdal.Open(imgfname)
    for band in range(ds.RasterCount):
        band += 1
        if band == 1:
            arr = np.array(ds.GetRasterBand(band).ReadAsArray())
            arr = np.expand_dims(arr, axis=2)
        else:
            concat = np.array(ds.GetRasterBand(band).ReadAsArray())
            concat = np.expand_dims(concat, axis=2)
            arr = np.concatenate((arr,
                                  concat),
                                 axis=2)
    return arr

In [5]:
def gen_arrays(files_path):
    '''
    generate arrays from image tiles
    '''
    label_list = []
    images_list = []
    for raw in files_path:
        #print (raw)
        images = img_to_array(raw)
        #print (images)
        images_list.append(np.expand_dims(images[0],axis = 0))

        #labels
        label = os.path.splitext(raw)[0].split('\\')[1].split('_')[1]
        label_list.append(label)

    #final arrays
    images_list = np.concatenate (np.asarray(images_list), axis = 0)
    label_list = list (map(int, label_list))
    label_list_array = np.asarray(label_list)
    #print (images_list)
    
    return images_list,label_list_array


In [6]:
root_path = 'E:/DEPAI_project'
raw_train_files = glob.glob(root_path+"/clipped_tiles_train/*.tif")
raw_test_files = glob.glob(root_path+"/clipped_tiles_test/*.tif")

out_train_arr = root_path + "/trial_arrays/train/city_samples.hdf5"
out_test_arr = root_path +"/trial_arrays/test/city_samples.hdf5"

# generate the trainset arrays
images_list,label_list_array = gen_arrays(raw_train_files)

# save the train arrays
with h5py.File(out_train_arr, mode = "w") as f:
    f["X_train"] = images_list
    f["Y_train"] = label_list_array

In [29]:
#generate the testset arrays
image_list,label_list_array = gen_arrays(raw_test_files)

# save the test test arrays
with h5py.File(out_test_arr, mode = "w") as f:
    f["X_test"] = images_list
    f["Y_test"] = label_list_array