# Read all the files in "ice" subfolder and creates a huge 4D numpy array to test the CNN model

In [2]:
from os import listdir

#Initiating the input directory
indir = r'Data\ice'

#Finding all the processed geotiffs in the input directory
files = [f for f in listdir(indir) if f.startswith('S0_RS2') and f.endswith('.tif')]

#Prints out the number of files to filter
print('There are ' + str(len(files)) + ' files to filter...')

There are 586 files to filter...


In [3]:
import numpy as np

#Function to resample the different bands into N x N subsamples
def resamp(arr, N):
    A = []
    for v in np.vsplit(arr, arr.shape[0] // N):
        A.extend([*np.hsplit(v, arr.shape[1] // N)])
    return np.array(A)

In [5]:
import gdal
from os.path import join
from datetime import datetime

#Initiating the output training data directory
outdir = r'Data\ice'

#Initiating which class to assemble (NOTE: This step is necessary due to the large amount of data the code has to hold in
#memory)
#Possible inputs: ['ice', 'water']
cnn_class = 'ice'

#Specifying the sample size, e.g. number of pixels
numpix = 20

#Specifying the number of bands
numbands = 4

#Initiating training data array
trarray = np.empty([0,numpix,numpix,numbands])

#Start filtering all images
for image in files:
    year = int(image.split('_')[6][0:4])
    month = int(image.split('_')[6][4:6])
    day = int(image.split('_')[6][6:8])
    
    if datetime(year,month,day) >= datetime(2019,8,1):
        
        print('Adding ' + image + ' to the ice training data array')

        #Reading the processed image
        ras = gdal.Open(join(indir, image))
        x = ras.RasterXSize
        y = ras.RasterYSize

        #Creating a temporary array
        temp_array = np.empty([int(int(x/numpix)*int(y/numpix)),numpix,numpix,numbands])

        #Subsampling the image by band
        for k in np.arange(numbands):

            #Getting the band from the image
            temp_array2 = ras.GetRasterBand(int(k+1)).ReadAsArray(0, 0, 
                                                                  int(int(x/numpix) * numpix), 
                                                                  int(int(y/numpix) * numpix))
            #Resampling the band into 20 x 20 subsamples
            temp_array[:,:,:,k] = resamp(temp_array2, numpix)

            del temp_array2

        #Initiating an empty list of indices to remove NoData subsamples
        idx = []

        #Finding the NoData subsamples
        for i in np.arange(len(temp_array)):
            if (~np.isnan(temp_array[i,:,:,0]).any() and 
                ~np.isnan(temp_array[i,:,:,1]).any() and 
                ~np.any(temp_array[i,:,:,0]==-90) and 
                ~np.any(temp_array[i,:,:,1]==-90)):
                idx.append(i)

        #Removing the NoData subsamples
        temp_array = temp_array[idx, :, :, :]

        #Appending the image temporary array to the final ice training data array
        trarray = np.append(trarray, temp_array, axis=0)
        del temp_array, ras, x, y

#Saving the Training Data
np.save(join(outdir, 'TestDataS0_' + cnn_class + '.npy'), trarray)
        

Adding S0_RS2_OK115894_PK1014272_DK953063_SCWA_20191230_003617_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK116124_PK1016138_DK954759_SCWA_20200105_143853_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK116124_PK1016157_DK954778_SCWA_20200106_003208_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK116125_PK1016179_DK954800_SCWA_20200106_141000_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK116360_PK1017804_DK956329_SCWA_20200112_143454_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK116360_PK1017846_DK956371_SCWA_20200113_002757_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK116669_PK1021564_DK960805_SCWA_20200119_143035_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK116669_PK1021583_DK960821_SCWA_20200120_002347_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK116670_PK1021621_DK960871_SCWA_20200120_140138_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK116779_PK102

In [6]:
np.shape(trarray)

(488169, 20, 20, 4)