# Read all the files in "ice" subfolder and creates a huge 4D numpy array to train the CNN model

In [4]:
from os import listdir

#Initiating the input directory
indir = r'Data\ice'

#Finding all the processed geotiffs in the input directory
files = [f for f in listdir(indir) if f.startswith('S0_RS2') and f.endswith('.tif')]

#Prints out the number of files to filter
print('There are ' + str(len(files)) + ' files to filter...')

There are 586 files to filter...


In [5]:
import numpy as np

#Function to resample the different bands into N x N subsamples
def resamp(arr, N):
    A = []
    for v in np.vsplit(arr, arr.shape[0] // N):
        A.extend([*np.hsplit(v, arr.shape[1] // N)])
    return np.array(A)

In [6]:
import gdal
from os.path import join
from datetime import datetime

#Initiating the output training data directory
outdir = r'Data\ice'

#Initiating which class to assemble (NOTE: This step is necessary due to the large amount of data the code has to hold in
#memory)
#Possible inputs: ['ice', 'water']
cnn_class = 'ice'

#Specifying the sample size, e.g. number of pixels
numpix = 20

#Specifying the number of bands
numbands = 4

#Initiating training data array
trarray = np.empty([0,numpix,numpix,numbands])

#Start filtering all images
for image in files:
    year = int(image.split('_')[6][0:4])
    month = int(image.split('_')[6][4:6])
    day = int(image.split('_')[6][6:8])
    
    #Images from 2019-08-01 and beyond are kept for testing the CNN model
    if datetime(year,month,day) < datetime(2019,8,1):
        
        print('Adding ' + image + ' to the ice training data array')

        #Reading the processed image
        ras = gdal.Open(join(indir, image))
        x = ras.RasterXSize
        y = ras.RasterYSize

        #Creating a temporary array
        temp_array = np.empty([int(int(x/numpix)*int(y/numpix)),numpix,numpix,numbands])

        #Subsampling the image by band
        for k in np.arange(numbands):

            #Getting the band from the image
            temp_array2 = ras.GetRasterBand(int(k+1)).ReadAsArray(0, 0, 
                                                                  int(int(x/numpix) * numpix), 
                                                                  int(int(y/numpix) * numpix))
            #Resampling the band into 20 x 20 subsamples
            temp_array[:,:,:,k] = resamp(temp_array2, numpix)

            del temp_array2

        #Initiating an empty list of indices to remove NoData subsamples
        idx = []

        #Finding the NoData subsamples
        for i in np.arange(len(temp_array)):
            if (~np.isnan(temp_array[i,:,:,0]).any() and 
                ~np.isnan(temp_array[i,:,:,1]).any() and 
                ~np.any(temp_array[i,:,:,0]==-90) and 
                ~np.any(temp_array[i,:,:,1]==-90)):
                idx.append(i)

        #Removing the NoData subsamples
        temp_array = temp_array[idx, :, :, :]

        #Appending the image temporary array to the final ice training data array
        trarray = np.append(trarray, temp_array, axis=0)
        del temp_array, ras, x, y

#Saving the Training Data
np.save(join(outdir, 'TrainingDataS0_' + cnn_class + '.npy'), trarray)
        

Adding S0_RS2_OK103852_PK901611_DK835303_SCWA_20181210_010600_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK104325_PK904711_DK837884_SCWA_20181222_001546_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK104327_PK904780_DK837969_SCWA_20181224_005723_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK104497_PK906903_DK840008_SCWA_20181229_001132_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK104498_PK906930_DK840035_SCWA_20181230_012309_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK104499_PK906943_DK840048_SCWA_20181230_132046_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK104499_PK906978_DK840083_SCWA_20181231_005320_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK104864_PK909364_DK842420_SCWA_20190107_004913_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK105064_PK911462_DK843838_SCWA_20190114_004503_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK10515_PK118372_DK1145

Adding S0_RS2_OK11494_PK129295_DK125167_SCWA_20100510_003346_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK11501_PK131013_DK126468_SCWA_20100517_002932_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK11507_PK133379_DK128846_SCWA_20100523_005434_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK11508_PK133431_DK128898_SCWA_20100524_002523_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK11514_PK134279_DK129875_SCWA_20100530_005028_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK11809_PK135297_DK130434_SCWA_20100606_004611_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK11816_PK136654_DK131792_SCWA_20100613_004206_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK120533_PK1060070_DK1001352_SCWA_20181229_001205_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK120533_PK1060073_DK1001355_SCWA_20190114_004529_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK120533_PK1060074_DK10013

Adding S0_RS2_OK25732_PK271283_DK247418_SCWA_20120116_010754_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK25738_PK269825_DK246331_SCWA_20120121_135902_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK25739_PK269888_DK246389_SCWA_20120123_010313_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK26860_PK272466_DK248420_SCWA_20120204_135119_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK26860_PK272488_DK248440_SCWA_20120205_012426_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK26861_PK272556_DK248518_SCWA_20120206_005448_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK26867_PK273306_DK249216_SCWA_20120211_134628_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK26867_PK273325_DK249235_SCWA_20120212_012011_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK26868_PK273381_DK249571_SCWA_20120213_005037_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK26874_PK275003_DK250312_SCWA_2

Adding S0_RS2_OK38261_PK376279_DK331615_SCWA_20130414_002540_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK38262_PK376300_DK331634_SCWA_20130414_140330_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK38264_PK378766_DK332852_SCWA_20130417_003808_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK38266_PK378826_DK333321_SCWA_20130418_134632_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK38269_PK378949_DK333239_SCWA_20130420_142813_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK38270_PK378979_DK333269_SCWA_20130421_135920_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK38272_PK380819_DK334848_SCWA_20130424_003358_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK38274_PK380842_DK334859_SCWA_20130425_134212_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK38276_PK380922_DK334922_SCWA_20130427_142406_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK38277_PK380978_DK334953_SCWA_2

Adding S0_RS2_OK59050_PK552753_DK487969_SCWA_20141203_012332_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK59059_PK553533_DK488794_SCWA_20141207_010729_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK59060_PK550016_DK486274_SCWA_20141208_003801_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK5923_PK76319_DK74165_SCWA_20090628_005013_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK59297_PK552117_DK487380_SCWA_20141214_010301_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK59299_PK552158_DK487417_SCWA_20141215_003344_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK59427_PK553403_DK488684_SCWA_20141221_005855_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK59428_PK553424_DK488705_SCWA_20141221_143625_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK59428_PK553446_DK488725_SCWA_20141222_002925_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK59633_PK555228_DK489896_SCWA_2014

Adding S0_RS2_OK72747_PK653286_DK583123_SCWA_20160214_134614_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK72747_PK656229_DK583145_SCWA_20160215_011949_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK72889_PK654456_DK583684_SCWA_20160220_141115_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK72890_PK654496_DK583724_SCWA_20160221_134147_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK72890_PK654511_DK583739_SCWA_20160222_011532_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK73242_PK655580_DK584722_SCWA_20160227_140701_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK73243_PK655614_DK584756_SCWA_20160228_133746_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK73243_PK655635_DK584777_SCWA_20160229_011121_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK73604_PK658770_DK587577_SCWA_20160312_135834_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK73605_PK658831_DK587636_SCWA_2

Adding S0_RS2_OK92890_PK820733_DK749024_SCWA_20171210_134943_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK92890_PK820761_DK749052_SCWA_20171211_012330_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK93079_PK822755_DK751333_SCWA_20171217_134526_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK93079_PK822786_DK751361_SCWA_20171218_011905_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK93288_PK824107_DK753741_SCWA_20171224_134119_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK93288_PK824138_DK753771_SCWA_20171225_011448_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK93444_PK825129_DK754765_SCWA_20171231_133659_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK93444_PK825164_DK754800_SCWA_20180101_011031_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK9356_PK106966_DK104014_SCWA_20100130_142827_HH_HV_SGF.tif to the ice training data array
Adding S0_RS2_OK9357_PK109064_DK105726_SCWA_201

In [7]:
np.shape(trarray)

(7215051, 20, 20, 4)