# Read all the files in "ice" subfolder and creates a huge 4D numpy array to test the CNN model

In [1]:
from os import listdir

#Initiating the input directory
indir = r'Data\water'

#Finding all the processed geotiffs in the input directory
files = [f for f in listdir(indir) if f.startswith('S0_RS2') and f.endswith('.tif')]

#Prints out the number of files to filter
print('There are ' + str(len(files)) + ' files to filter...')

There are 537 files to filter...


In [2]:
import numpy as np

#Function to resample the different bands into N x N subsamples
def resamp(arr, N):
    A = []
    for v in np.vsplit(arr, arr.shape[0] // N):
        A.extend([*np.hsplit(v, arr.shape[1] // N)])
    return np.array(A)

In [3]:
import gdal
from os.path import join
from datetime import datetime

#Initiating the output training data directory
outdir = r'Data\water'

#Initiating which class to assemble (NOTE: This step is necessary due to the large amount of data the code has to hold in
#memory)
#Possible inputs: ['ice', 'water']
cnn_class = 'water'

#Specifying the sample size, e.g. number of pixels
numpix = 20

#Specifying the number of bands
numbands = 4

#Initiating training data array
trarray = np.empty([0,numpix,numpix,numbands])

#Start filtering all images
for image in files:
    year = int(image.split('_')[6][0:4])
    month = int(image.split('_')[6][4:6])
    day = int(image.split('_')[6][6:8])
    
    if datetime(year,month,day) >= datetime(2019,8,1):
        
        print('Adding ' + image + ' to the ice training data array')

        #Reading the processed image
        ras = gdal.Open(join(indir, image))
        x = ras.RasterXSize
        y = ras.RasterYSize

        #Creating a temporary array
        temp_array = np.empty([int(int(x/numpix)*int(y/numpix)),numpix,numpix,numbands])

        #Subsampling the image by band
        for k in np.arange(numbands):

            #Getting the band from the image
            temp_array2 = ras.GetRasterBand(int(k+1)).ReadAsArray(0, 0, 
                                                                  int(int(x/numpix) * numpix), 
                                                                  int(int(y/numpix) * numpix))
            #Resampling the band into 20 x 20 subsamples
            temp_array[:,:,:,k] = resamp(temp_array2, numpix)

            del temp_array2

        #Initiating an empty list of indices to remove NoData subsamples
        idx = []

        #Finding the NoData subsamples
        for i in np.arange(len(temp_array)):
            if (~np.isnan(temp_array[i,:,:,0]).any() and 
                ~np.isnan(temp_array[i,:,:,1]).any() and 
                ~np.any(temp_array[i,:,:,0]==-90) and 
                ~np.any(temp_array[i,:,:,1]==-90)):
                idx.append(i)

        #Removing the NoData subsamples
        temp_array = temp_array[idx, :, :, :]

        #Appending the image temporary array to the final ice training data array
        trarray = np.append(trarray, temp_array, axis=0)
        del temp_array, ras, x, y

#Saving the Training Data
np.save(join(outdir, 'TestDataS0_' + cnn_class + '.npy'), trarray)
        

Adding S0_RS2_OK111661_PK976652_DK914274_SCWA_20190812_001947_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK111662_PK976675_DK914314_SCWA_20190812_135722_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK112067_PK980258_DK917672_SCWA_20190813_132916_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK112068_PK980289_DK917703_SCWA_20190814_143907_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK112069_PK980530_DK917895_SCWA_20190815_140958_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK112070_PK980588_DK917991_SCWA_20190816_134054_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK112071_PK980756_DK918210_SCWA_20190818_004446_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK112074_PK985382_DK923250_SCWA_20190819_135323_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK112317_PK982907_DK920602_SCWA_20190820_132413_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK112317_PK986061_DK923

Adding S0_RS2_OK120535_PK1060135_DK1001428_SCWA_20190905_002011_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK120535_PK1060141_DK1001431_SCWA_20190912_001600_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK120535_PK1060143_DK1001432_SCWA_20190915_002834_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK120535_PK1060147_DK1001434_SCWA_20190919_001149_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK120535_PK1060151_DK1001436_SCWA_20190922_002420_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK120535_PK1060153_DK1001437_SCWA_20190923_133244_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK120535_PK1060158_DK1001439_SCWA_20190926_134515_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK120535_PK1060162_DK1001441_SCWA_20190929_002010_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK120535_PK1060164_DK1001442_SCWA_20190930_132834_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK120

In [4]:
np.save(join(outdir, 'TestData_' + cnn_class + '.npy'), trarray)
np.shape(trarray)

(671304, 20, 20, 4)