Script to get small test samples that can be used to test Dataset classes. Tests should then be written that check metadata values, file structure, normalisation, splitting locations etc. of each dataset

In [1]:
import numpy as np
import random
import os
import tifffile as tif
import glymur
import cv2
import spectral as spy
import skimage.io

import pprint

np.random.seed(1123)
random.seed(259)

In [2]:
# Set paths for root directories of datasets

L7Irish206_dir = 'E:/datasets/L7Irish206'
L8Biome96_dir = 'E:/datasets/L8Biome96'
L8SPARCS80_dir = 'D:/datasets/clouds/L8SPARCS80'
S2CESBIO38_dir = 'E:/datasets/S2CESBIO38'


In [3]:
# Randomly select a scene for each dataset.

## L7Irish206
scene_dirs = []
for root,dirs,paths in os.walk(L7Irish206_dir):
    for path in paths:
        if 'mask2019' in path:
            scene_dirs.append(root)
L7Irish206_sceneid = random.choice(scene_dirs)
print('L7Irish206 SCENE: ',L7Irish206_sceneid)


## L8Biome96
scene_dirs = []
for root,dirs,paths in os.walk(L8Biome96_dir):
    for path in paths:
        if 'fixedmask.hdr' in path:
            scene_dirs.append(root)
L8Biome96_sceneid = random.choice(scene_dirs)            
print('L8Biome96 SCENE: ',L8Biome96_sceneid)


## L8SPARCS80
scenes = [
          os.path.join(L8SPARCS80_dir,path)[:-12] 
          for path in os.listdir(L8SPARCS80_dir) 
          if path.endswith('_mask.png')
         ]
L8SPARCS80_sceneid = random.choice(scenes) 
print('L8SPARCS80 SCENE: ',L8SPARCS80_sceneid)


## S2CESBIO38
scene_dirs = []
for root,dirs,paths in os.walk(S2CESBIO38_dir):
    for path in paths:
        if 'classification_map' in path:
            scene_dirs.append(root)
S2CESBIO38_sceneid = random.choice(scene_dirs)   
print('S2CESBIO38 SCENE: ',S2CESBIO38_sceneid)


L7Irish206 SCENE:  E:/datasets/L7Irish206\p108_r18
L8Biome96 SCENE:  E:/datasets/L8Biome96\Urban\LC81940222013245LGN00
L8SPARCS80 SCENE:  D:/datasets/clouds/L8SPARCS80\LC80340412013132LGN01
S2CESBIO38 SCENE:  E:/datasets/S2CESBIO38\Reference_dataset\scene_20\Classification


In [4]:
# Extract an image/mask for L7Irish206.


L7Irish206_band_ids = ['B10.TIF',
                       'B20.TIF',
                       'B30.TIF',
                       'B40.TIF',
                       'B50.TIF',
                       'B61.TIF',
                       'B62.TIF',
                       'B70.TIF',
                       'B80.TIF']
L7Irish206_bands = []
for band_id in L7Irish206_band_ids:
    for root,dirs,paths in os.walk(L7Irish206_sceneid):
        for path in paths:
            if band_id in path:
                L7Irish206_bands.append(tif.imread(os.path.join(root,path)).astype('float64'))
    
for root,dirs,paths in os.walk(L7Irish206_sceneid):
    for path in paths:
        if 'mask2019' in path:
            L7Irish206_mask = tif.imread(os.path.join(root,path))
L7Irish206_scale_factors = [band.shape[0]/L7Irish206_bands[0].shape[0] for band in L7Irish206_bands]
print(L7Irish206_scale_factors)
L7Irish206_bands = np.stack([cv2.resize(band,L7Irish206_bands[0].shape[::-1],cv2.INTER_NEAREST) for band in L7Irish206_bands], axis=-1)

# GET REGION FOR CROP
centre = [int(L7Irish206_bands.shape[0]/2),int(L7Irish206_bands.shape[1]/2)]
patch_size = [random.randint(700,1000),random.randint(700,1000)]
top_left = [random.randint(centre[0]-patch_size[0],centre[0]),random.randint(centre[1]-patch_size[1],centre[1])]
crop = slice(top_left[0],top_left[0]+patch_size[0]),slice(top_left[1],top_left[1]+patch_size[1])

L7Irish206_bands = L7Irish206_bands[crop]
L7Irish206_mask = L7Irish206_mask[crop]

print(L7Irish206_bands.shape)
print(L7Irish206_mask.shape)

# Now we separate the input data from the desired output and save it as our synthetic dataset

L7Irish206_bands_list = [L7Irish206_bands[...,i] for i in range(L7Irish206_bands.shape[-1])]
L7Irish206_input_bands=[]
for band,s_f in zip(L7Irish206_bands_list,L7Irish206_scale_factors):
    L7Irish206_input_bands.append(cv2.resize(band,tuple(int(d*s_f) for d in band.shape[::-1]),cv2.INTER_NEAREST))

[1.0, 1.0, 1.0, 1.0, 1.0, 0.5000655222120298, 0.5000655222120298, 1.0, 1.9998689555759404]
(952, 820, 9)
(952, 820)


In [164]:
# Extract an image/mask for L8Biome96.


L8Biome96_band_ids = ['B1.TIF','B2.TIF','B3.TIF','B4.TIF','B5.TIF','B6.TIF','B7.TIF','B8.TIF','B9.TIF','B10.TIF','B11.TIF']
L8Biome96_bands = []
for band_id in L8Biome96_band_ids:
    for root,dirs,paths in os.walk(L8Biome96_sceneid):
        for path in paths:
            if band_id in path:
                L8Biome96_bands.append(tif.imread(os.path.join(root,path)).astype('float64'))
    
for root,dirs,paths in os.walk(L8Biome96_sceneid):
    for path in paths:
        if '_fixedmask.hdr' in path:
            L8Biome96_mask = np.squeeze(spy.open_image(os.path.join(root,path)).load())
            
L8Biome96_scale_factors = [band.shape[0]/L8Biome96_bands[0].shape[0] for band in L8Biome96_bands]
print(L8Biome96_scale_factors)
L8Biome96_bands = np.stack([cv2.resize(band,L8Biome96_bands[0].shape[::-1],cv2.INTER_NEAREST) for band in L8Biome96_bands], axis=-1)

# GET REGION FOR CROP
centre = [int(L8Biome96_bands.shape[0]/2),int(L8Biome96_bands.shape[1]/2)]
patch_size = [random.randint(700,1000),random.randint(700,1000)]
top_left = [random.randint(centre[0]-patch_size[0],centre[0]),random.randint(centre[1]-patch_size[1],centre[1])]
crop = slice(top_left[0],top_left[0]+patch_size[0]),slice(top_left[1],top_left[1]+patch_size[1])

L8Biome96_bands = L8Biome96_bands[crop]
L8Biome96_mask = L8Biome96_mask[crop]

print(L8Biome96_bands.shape)
print(L8Biome96_mask.shape)

# Now we separate the input data from the desired output and save it as our synthetic dataset

L8Biome96_bands_list = [L8Biome96_bands[...,i] for i in range(L8Biome96_bands.shape[-1])]
L8Biome96_input_bands=[]
for band,s_f in zip(L8Biome96_bands_list,L8Biome96_scale_factors):
    L8Biome96_input_bands.append(cv2.resize(band,tuple(int(d*s_f) for d in band.shape[::-1]),cv2.INTER_NEAREST))

  from ipykernel import kernelapp as app


[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.9998786555029728, 1.0, 1.0, 1.0]
(724, 964, 11)
(724, 964)


In [165]:
# Extract an image/mask for L8SPARCS80.


L8SPARCS80_band_ids = ['B1','B2','B3','B4','B5','B6','B7','B9','B10','B11']
for path in os.listdir(L8SPARCS80_dir):
    if os.path.split(L8SPARCS80_sceneid)[1] in path and '_data' in path:
        L8SPARCS80_bands = tif.imread(os.path.join(L8SPARCS80_dir,path)).astype('float64')
    elif os.path.split(L8SPARCS80_sceneid)[1] in path and '_mask' in path:
        L8SPARCS80_mask = skimage.io.imread(os.path.join(L8SPARCS80_dir,path))

      
# GET REGION FOR CROP
centre = [int(L8SPARCS80_bands.shape[0]/2),int(L8SPARCS80_bands.shape[1]/2)]
patch_size = [random.randint(300,460),random.randint(300,460)]
top_left = [random.randint(centre[0]-patch_size[0],centre[0]),random.randint(centre[1]-patch_size[1],centre[1])]
crop = slice(top_left[0],top_left[0]+patch_size[0]),slice(top_left[1],top_left[1]+patch_size[1])

L8SPARCS80_bands = L8SPARCS80_bands[crop]
L8SPARCS80_mask = L8SPARCS80_mask[crop]

print(L8SPARCS80_bands.shape)
print(L8SPARCS80_mask.shape)

(321, 350, 10)
(321, 350, 3)


In [166]:
# Extract an image/mask for S2CESBIO.


S2CESBIO38_band_ids = ['B01.jp2',
                      'B02.jp2',
                      'B03.jp2',
                      'B04.jp2',
                      'B05.jp2',
                      'B06.jp2',
                      'B07.jp2',
                      'B08.jp2',
                      'B8A.jp2',
                      'B09.jp2',
                      'B10.jp2',
                      'B11.jp2',
                      'B12.jp2'
                      ]
S2CESBIO38_bands = []
for band_id in S2CESBIO38_band_ids:
    for root,dirs,paths in os.walk(S2CESBIO38_sceneid):
        for path in paths:
            if band_id in path:
                S2CESBIO38_bands.append(glymur.Jp2k(os.path.join(root,path))[:].astype('float64'))
    
for root,dirs,paths in os.walk(S2CESBIO38_sceneid):
    for path in paths:
        if 'classification_map' in path:
            S2CESBIO38_mask = tif.imread(os.path.join(root,path))
            
# Resize all bands to same resolution as first band and make into array
S2CESBIO38_scale_factors = [band.shape[0]/S2CESBIO38_bands[0].shape[0] for band in S2CESBIO38_bands]
print(S2CESBIO38_scale_factors)
S2CESBIO38_bands = np.stack([cv2.resize(band,S2CESBIO38_bands[0].shape[::-1],cv2.INTER_NEAREST) for band in S2CESBIO38_bands], axis=-1)


# GET REGION FOR CROP
centre = [int(S2CESBIO38_bands.shape[0]/2),int(S2CESBIO38_bands.shape[1]/2)]
patch_size = [random.randint(300,500),random.randint(300,500)]
top_left = [random.randint(centre[0]-patch_size[0],centre[0]),random.randint(centre[1]-patch_size[1],centre[1])]
crop = slice(top_left[0],top_left[0]+patch_size[0]),slice(top_left[1],top_left[1]+patch_size[1])

S2CESBIO38_bands = S2CESBIO38_bands[crop]
S2CESBIO38_mask = S2CESBIO38_mask[crop]

print(S2CESBIO38_bands.shape)
print(S2CESBIO38_mask.shape)

# Now we separate the input data from the desired output and save it as our synthetic dataset

S2CESBIO38_bands_list = [S2CESBIO38_bands[...,i] for i in range(S2CESBIO38_bands.shape[-1])]
S2CESBIO38_input_bands=[]
for band,s_f in zip(S2CESBIO38_bands_list,S2CESBIO38_scale_factors):
    S2CESBIO38_input_bands.append(cv2.resize(band,tuple(int(d*s_f) for d in band.shape[::-1]),cv2.INTER_NEAREST))


[1.0, 6.0, 6.0, 6.0, 3.0, 3.0, 3.0, 6.0, 3.0, 1.0, 1.0, 3.0, 3.0]
(335, 334, 13)
(335, 334)


(335, 334)
(2010, 2004)
(2010, 2004)
(2010, 2004)
(1005, 1002)
(1005, 1002)
(1005, 1002)
(2010, 2004)
(1005, 1002)
(335, 334)
(335, 334)
(1005, 1002)
(1005, 1002)


(200, 150)
