# Read all the files in "ice" subfolder and creates a huge 4D numpy array to train the CNN model

In [1]:
from os import listdir

#Initiating the input directory
indir = r'Data\water'

#Finding all the processed geotiffs in the input directory
files = [f for f in listdir(indir) if f.startswith('S0_RS2') and f.endswith('.tif')]

#Prints out the number of files to filter
print('There are ' + str(len(files)) + ' files to filter...')

There are 537 files to filter...


In [2]:
import numpy as np

#Function to resample the different bands into N x N subsamples
def resamp(arr, N):
    A = []
    for v in np.vsplit(arr, arr.shape[0] // N):
        A.extend([*np.hsplit(v, arr.shape[1] // N)])
    return np.array(A)

In [3]:
import gdal
from os.path import join
from datetime import datetime

#Initiating the output training data directory
outdir = r'Data\water'

#Initiating which class to assemble (NOTE: This step is necessary due to the large amount of data the code has to hold in
#memory)
#Possible inputs: ['ice', 'water']
cnn_class = 'water'

#Specifying the sample size, e.g. number of pixels
numpix = 20

#Specifying the number of bands
numbands = 4

#Initiating training data array
trarray = np.empty([0,numpix,numpix,numbands])

#Start filtering all images
for image in files:
    
    year = int(image.split('_')[6][0:4])
    month = int(image.split('_')[6][4:6])
    day = int(image.split('_')[6][6:8])
    
    if datetime(year,month,day) < datetime(2019,8,1):
        
        print('Adding ' + image + ' to the water training data array')

        #Reading the processed image
        ras = gdal.Open(join(indir, image))
        x = ras.RasterXSize
        y = ras.RasterYSize

        #Creating a temporary array
        temp_array = np.empty([int(int(x/numpix)*int(y/numpix)),numpix,numpix,numbands])

        #Subsampling the image by band
        for k in np.arange(numbands):

            #Getting the band from the image
            temp_array2 = ras.GetRasterBand(int(k+1)).ReadAsArray(0, 0, 
                                                                  int(int(x/numpix) * numpix), 
                                                                  int(int(y/numpix) * numpix))
            #Resampling the band into 20 x 20 subsamples
            temp_array[:,:,:,k] = resamp(temp_array2, numpix)

            del temp_array2

        #Initiating an empty list of indices to remove NoData subsamples
        idx = []

        #Finding the NoData subsamples
        for i in np.arange(len(temp_array)):
            if (~np.isnan(temp_array[i,:,:,0]).any() and 
                ~np.isnan(temp_array[i,:,:,1]).any() and 
                ~np.any(temp_array[i,:,:,0]==-90) and 
                ~np.any(temp_array[i,:,:,1]==-90)):
                idx.append(i)

        #Removing the NoData subsamples
        temp_array = temp_array[idx, :, :, :]

        #Appending the image temporary array to the final ice training data array
        trarray = np.append(trarray, temp_array, axis=0)
        del temp_array, ras, x, y

#Saving the Training Data
np.save(join(outdir, 'TrainingDataS0_' + cnn_class + '.npy'), trarray)
        

Adding S0_RS2_OK100050_PK871438_DK804345_SCWA_20180813_003647_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK100051_PK871472_DK804379_SCWA_20180814_000729_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK100282_PK872914_DK805544_SCWA_20180814_134507_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK100286_PK873035_DK805666_SCWA_20180818_132805_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK100286_PK873036_DK805667_SCWA_20180818_132920_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK100286_PK873042_DK805673_SCWA_20180819_010157_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK100287_PK873077_DK805708_SCWA_20180820_003228_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK100606_PK874758_DK808115_SCWA_20180824_001619_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK100608_PK874801_DK808158_SCWA_20180825_132501_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK100

Adding S0_RS2_OK13156_PK148075_DK142207_SCWA_20100727_143708_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK14352_PK156321_DK148795_SCWA_20100827_005501_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK14352_PK158330_DK150413_SCWA_20100828_002529_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK14355_PK164152_DK154972_SCWA_20100918_135034_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK14569_PK160003_DK151886_SCWA_20100915_133805_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK14569_PK162357_DK153639_SCWA_20100925_134626_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK14569_PK164182_DK151887_SCWA_20100918_135036_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK15178_PK163009_DK154412_SCWA_20101001_141209_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK15181_PK163851_DK154707_SCWA_20101004_142427_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK15182_PK1638

Adding S0_RS2_OK23855_PK251539_DK232302_SCWA_20111023_142457_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK31367_PK309002_DK275817_SCWA_20120723_143220_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK31607_PK309396_DK276221_SCWA_20120724_140327_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK31612_PK309487_DK276309_SCWA_20120727_141541_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK31614_PK314863_DK276335_SCWA_20120728_134612_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK31614_PK314864_DK276336_SCWA_20120728_134727_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK31616_PK309605_DK276415_SCWA_20120730_142905_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK31826_PK312385_DK278522_SCWA_20120731_135918_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK31831_PK317994_DK278807_SCWA_20120804_134312_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK31832_PK3126

Adding S0_RS2_OK44816_PK431795_DK381841_SCWA_20131012_142407_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK44818_PK431841_DK381884_SCWA_20131013_135457_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK45163_PK434789_DK384782_SCWA_20131016_140734_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK45165_PK434909_DK384812_SCWA_20131017_133834_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK45173_PK434991_DK384886_SCWA_20131019_141952_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK45175_PK435028_DK384925_SCWA_20131020_135045_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK45175_PK435031_DK384928_SCWA_20131021_012431_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK45366_PK437862_DK388476_SCWA_20131022_143227_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK45368_PK437893_DK388523_SCWA_20131023_140316_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK45374_PK4380

Adding S0_RS2_OK67765_PK618648_DK549608_SCWA_20150921_010720_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK68026_PK620534_DK551340_SCWA_20150928_010257_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK68196_PK621895_DK552232_SCWA_20151002_004613_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK68199_PK621918_DK552252_SCWA_20151002_142344_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK68201_PK621951_DK552283_SCWA_20151003_135524_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK68203_PK622027_DK552356_SCWA_20151005_005843_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK68206_PK622067_DK552395_SCWA_20151006_002928_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK68330_PK622791_DK553141_SCWA_20151006_140701_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK68337_PK622865_DK553212_SCWA_20151009_141932_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK68339_PK6228

Adding S0_RS2_OK80440_PK709261_DK637241_SCWA_20161017_003323_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK80761_PK710525_DK638784_SCWA_20161018_134204_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK80761_PK710707_DK638966_SCWA_20161019_011529_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK80764_PK710747_DK639002_SCWA_20161020_004554_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK80767_PK710577_DK638836_SCWA_20161020_142323_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK80769_PK710602_DK638861_SCWA_20161021_135420_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK80771_PK710638_DK638897_SCWA_20161023_005830_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK80774_PK710654_DK638913_SCWA_20161023_143554_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK8120_PK96963_DK95599_SCWA_20091014_133746_HH_HV_SGF.tif to the water training data array
Adding S0_RS2_OK8122_PK96990_DK

In [4]:
np.shape(trarray)

(5534646, 20, 20, 4)