## QC of Ingestion

This notebook checks the INGESTED RASTER data (on s3) information against EDC data. A list of QC-tests is carried out for each tile and a QC-file is created:
-> [QC_environmental_zones_1km.txt]






In [1]:
# Configure plots for inline use in Jupyter Notebook
%matplotlib inline
import datetime as dt
# Utilities
import boto3
import dateutil
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import os
import rasterio
import rasterio.mask
from rasterio.plot import show_hist
from rasterio.plot import show
from rasterio.windows import Window
import random
import fiona
import numpy as np
from shapely.geometry import mapping, Polygon
from shapely import geometry
import logging    

# Sentinel Hub
from sentinelhub import (
    CRS,
    BBox,
    ByocCollection,
    ByocCollectionAdditionalData,
    ByocCollectionBand,
    ByocTile,
    DataCollection,
    DownloadFailedException,
    MimeType,
    SentinelHubBYOC,
    SentinelHubRequest,
    SHConfig,
    bbox_to_dimensions,
    os_utils,
)

config = SHConfig()
config.instance_id = os.environ.get("SH_INSTANCE_ID")
config.sh_client_id = os.environ.get("SH_CLIENT_ID")
config.sh_client_secret = os.environ.get("SH_CLIENT_SECRET")
config.aws_access_key_id = os.environ.get("username")
config.aws_secret_access_key = os.environ.get("password")
    
print ("reading of libaries done")
    

readin of libaries done


In [3]:
# Read raster files on S3 drive
#### CHECKING ENV ZONES:

##########################SET name and data folder: ################################################################################ S
name_of_ingestion = "environmental_zones_1km"  ## should be the collection name!!!"
collection_id ='5b45916e-6704-4581-824f-4d713198731b'  # collection ID 
input_folder ="./../../../../s3/data/d005_env_zones/eea_r_3035_1_km_envzones_p_2018_v01_r00"   ## conect to tile folder on S3
##################################################################################################################################### E

# assign s3 directory
directory = input_folder
print (directory)
print ("following raster is selected for QC:")
##################################### QC log file set up:#################################### S
#now we will Create and configure logger 
#https://www.geeksforgeeks.org/reading-writing-text-files-python/
qc_log = open("QC_"+name_of_ingestion+".txt","w")
qc_log.write("QC_"+name_of_ingestion+" \n")
qc_log.write("---------------------------------------------------------- \n")
############################################################################################# E



###############################################################open collection: ############ S
# Initialize SentinelHubBYOC class
byoc = SentinelHubBYOC(config=config)
#############################################################
name_of_your_collection = name_of_ingestion
collections_iterator = byoc.iter_collections(name_of_your_collection)
my_collection = list(collections_iterator)[0]
tiles = list(byoc.iter_tiles(my_collection))
############################################################################################# E


###  READING sub-tiles inside collection ##################### ############ S
for tile in tiles:
    #print(tile)
    tile_name = tile['path'].split("/")   ## get correct rasster-tile name from cube-tile:
    tile_name_2=(tile_name[3])
    tile_name_3 = tile_name_2.split("(")
    tile_name_4= tile_name_3[0]+"B01.tif"
    ###########################################################################################################################START QC of every single sub -.tile:

    ## reading the raster files on s3:
    #raster_list=[]
    
    for filename in os.scandir(directory):                                 ####LOOP first over cube tiles - then over raster tiles  (1)
        if filename.is_file():
            #print(filename.path)
            raster_with_full_filename = filename
            raster_name_from_s3_1=filename.path.split("/")
            raster_name_from_s3_2=raster_name_from_s3_1[-1]

            if tile_name_4 == raster_name_from_s3_2:                        ####LOOP first over cube tiles - then over raster tiles  (2)
                print("xxxxxxxxxxxxxxxxxxxxxxxxxxxxx")
                
                ## PART 1 select randomly one raster: ----------------------------------    
                raster_for_qc = raster_with_full_filename  
                print('ooooooooooooooooooooooooooooooooooooo')
                print ( "RASTER for QC:")
                print(raster_for_qc)
                print ( "CUBE tile for QC:")
                print(tile['id'])     
                print('ooooooooooooooooooooooooooooooooooooo')
                

                ### PART 1 b read raster: ----------------------------------  
                ##raster_for_qc=test_raster############################################################################################# S
                raster = rasterio.open(raster_for_qc)
                qc_log.write("(1) CHECK 1 - spatial check for raster files:   \n")
                qc_log.write("----------------------------------------------   \n")
                # read different raster information from s3 raster file (tiles):
                raster_name = raster.name
                raster_bands = raster.count
                raster_width= raster.width
                raster_height= raster.height
                raster_bounds= raster.bounds
                
                qc_log.write( raster_name + "   \n ")
                #
                left = raster_bounds[0]
                bottom = raster_bounds[1]
                right = raster_bounds[2]
                top = raster_bounds[3]

                raster_transform =raster.transform
                pixelSizeX = raster_transform[0]
                pixelSizeY =-raster_transform[4]
                raster_crs =raster.crs
                raster_dtype = raster.dtypes[0]
                raster_nodata = raster.nodata
                   
                with rasterio.open(raster_for_qc) as ds:
                    band_data = ds.read(1, window=Window(0, 0, 20, 20))  # pixel 2000/100 = 20   - use only a small window: to be able to compare the data with the aoi from the CUBE
                    r_min_raster = band_data.min() 
                    r_max_raster = band_data.max() 
                    r_mean_raster = band_data.mean() 
                     
                ## set the same bounding mox (20x20p pixel)  
                x1 =left
                y1 =top
                x2 =x1 + 2000
                y2 =y1 - 2000
                resolution = 100
                bbox_coords = x1, y1, x2, y2
                print (bbox_coords)
                lux_bbox=  BBox(bbox=bbox_coords, crs=CRS('3035').pyproj_crs())
                lux_size = bbox_to_dimensions(lux_bbox, resolution=resolution)
                print(f"Image shape at {resolution} m resolution: {lux_size} pixels")
                data_collection_aoi = DataCollection.define_byoc(collection_id, name=name_of_ingestion)

                # simple evalscript to read the for bounding box inside the cube-tile:
                evalscript_test = """

                //VERSION=3
                function setup() {
                  return {
                    input: ["B01"],
                    output: { 
                        bands: 1,
                        sampleType: "UINT16" // raster format will be UINT16
                        }

                  };
                }

                function evaluatePixel(sample) {
                  return [sample.B01];
                }
                """
                request = SentinelHubRequest(
                        evalscript=evalscript_test,
                        input_data=[
                            SentinelHubRequest.input_data(
                                data_collection=DataCollection.environmental_zones_1km,                
                            )
                        ],
                        responses=[
                            SentinelHubRequest.output_response('default', MimeType.PNG)
                        ],
                        bbox=lux_bbox,
                        size=bbox_to_dimensions(lux_bbox, 100),
                        config=config)

                data = request.get_data()[0]

                data_min_cube = np.min(data)
                data_max_cube = np.max(data)
                data_mean_cube= np.mean(data)
                ##cube tiles_for_qc=test_raster#############################################################################################  END
                
                qc_log.write ("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n")  
                print ("check 1.1:.....") ########################################################################################check 1.1 CRS
                EPSG_code = tile['coverGeometry'][ 'crs']['properties'][ 'name'].split(':')[-1]
                #qc_log.write ("ESPG: "+str(EPSG_code)           + " \n")
                #print ("---------")
                cube_epsg_str =  ("EPSG:"+str(EPSG_code))
                
                qc_log.write ("check 1.1 (CRS) -START \n")   
                qc_log.write ("-RASTER:  \n")
                qc_log.write ("  EPSG code: " +str(raster_crs) +' \n')    
                qc_log.write ("-CUBE_TILE:  \n")
                qc_log.write ("  EPSG code: " +str(cube_epsg_str) +' \n')            
                raster_epsg_str =  (str(raster_crs))
                #print ("---------")
                
                if cube_epsg_str == raster_epsg_str:
                    qc_log.write ("check 1.1 - EPSG (crs): OK   \n")
                else:
                    qc_log.write ("check 1.1 - EPSG (crs): NOT-OK  \n")
                    
                qc_log.write ("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n")   
                print ("check 1.2:.....") ########################################################################################check 1.2 cell size
                qc_log.write ("check 1.2 (cellsize)-START  \n")  
                qc_log.write ("-RASTER:  \n")
                qc_log.write ("  Pixel size x: " +str(pixelSizeX) +' \n')
                qc_log.write ("  Pixel siez y: " +str(pixelSizeY) +' \n')
                              
                qc_log.write ("-CUBE_TILE:  \n")
                
                qc_log.write ("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n")  
                print ("check 1.3:.....") ########################################################################################check 1.3 tile size extend:
                qc_log.write ("check 1.3 (extend-START \n")  
                qc_log.write ("-RASTER:  \n")
                qc_log.write ("  wiht: "+str(raster_width)     + " \n")
                qc_log.write ("  height: "+str(raster_height)    + " \n")
                qc_log.write ("-CUBE_TILE:  \n")
                
                
                #pixelSizeX = raster_transform[0]   ## raster
                #pixelSizeY =-raster_transform[4]    ###raster
                qc_log.write ("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n")  
                print ("check 1.4:.....")             
                qc_log.write ("check 1.4 (data type & statistics)-START  \n")  
                #https://sentinelhub-py.readthedocs.io/en/latest/examples/byoc_request.html
                ##print ("NEXT file:.....")
                
                qc_log.write ("-RASTER:  \n")
                qc_log.write ("  raster data type: "    + str(raster_dtype)     + " \n")
                qc_log.write ("  raster nodata value: " + str(raster_nodata)    + " \n")  
                qc_log.write ("  max raster value: "  +str(r_max_raster) +' \n')
                qc_log.write ("  min raster value: " +str(r_min_raster) +' \n')
                qc_log.write ("  avg raster value: " +str(r_mean_raster) +' \n')
                qc_log.write ("-CUBE_TILE:  \n")
                qc_log.write ("  max cube value: "  +str(data_max_cube) +' \n')
                qc_log.write ("  min cube value: " +str(data_min_cube) +' \n')
                qc_log.write ("  avg cube  value: " +str(data_mean_cube) +' \n')
                
                
                if str(r_max_raster) == str(data_max_cube)  and str(r_min_raster) == str(data_min_cube) :                     
                    qc_log.write ("check 1.4 - data : OK   \n")
                else:
                    qc_log.write ("check 1.4 - data :  NOT-OK  \n")
                                  
                                  
                qc_log.write ("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n")  
                print ("check 1.5:.....")  
                qc_log.write ("check 1.5 (extend -START \n")  
                qc_log.write ("-RASTER:  \n")
                qc_log.write ("  left: "+str(left)+'        \n')
                qc_log.write ("  bottom: " +str(bottom)+'    \n')
                qc_log.write ("  right: "+str(right)+'       \n')
                qc_log.write ("  top: " +str(top)+'            \n')
                qc_log.write ("-CUBE_TILE:  \n")
                
                #print (t['tileGeometry'] ['coordinates']  )
                bbox = tile['tileGeometry'] ['coordinates']

                ## top left
                #print ("top left:")
                top_left_x = bbox[0][0][0] 
                #print (top_left_x)
                top_left_y = bbox[0][0][1] 
                #print (top_left_y)

                ## rop  right
                #print ("top right:")
                top_right_x = bbox[0][1][0] 
                #print (top_right_x)
                top_right_y = bbox[0][1][1] 
                #print (top_right_y)


                ## bottom  right
                #print ("bottom right:")
                bottom_right_x = bbox[0][2][0] 
                #print (bottom_right_x)
                bottom_right_y = bbox[0][2][1] 
                #print (bottom_right_y)

                ## bottom left 
                #print ("bottom left:")
                bottom_left_x = bbox[0][3][0] 
                #print (bottom_left_x)
                bottom_left_y = bbox[0][3][1] 
                #print (bottom_left_y)
                
                qc_log.write ("  left: "   +str(bottom_left_x)+'         \n')
                qc_log.write ("  bottom: " +str(bottom_left_y)+'         \n')
                qc_log.write ("  right: "  +str(top_right_x)  +'         \n')
                qc_log.write ("  top: "    +str(top_right_y)  +'         \n')
                
                if str(bottom_left_x) == str(left)  and str(bottom_left_y) == str(bottom) and str(top_right_x) == str(right)    and str(top_right_y) == str(top):                     
                    qc_log.write ("check 1.5 - extend : OK   \n")
                else:
                    qc_log.write ("check 1.5 - extend :  NOT-OK  \n")
                
#print("2-------------------")

###############################################################open collection:

qc_log.close()
print ("end")


./../../../../s3/data/d005_env_zones/eea_r_3035_1_km_envzones_p_2018_v01_r00
following raster is selected for QC:
xxxxxxxxxxxxxxxxxxxxxxxxxxxxx
ooooooooooooooooooooooooooooooooooooo
RASTER for QC:
<DirEntry 'env_zones_1km_3035_2_6_B01.tif'>
CUBE tile for QC:
00cec537-6717-4e3b-89ad-7bd3240af390
ooooooooooooooooooooooooooooooooooooo
(5900000.0, 4500000.0, 5902000.0, 4498000.0)
Image shape at 100 m resolution: (27, 10) pixels
check 1.1:.....
check 1.2:.....
check 1.3:.....
check 1.4:.....
check 1.5:.....
xxxxxxxxxxxxxxxxxxxxxxxxxxxxx
ooooooooooooooooooooooooooooooooooooo
RASTER for QC:
<DirEntry 'env_zones_1km_3035_3_3_B01.tif'>
CUBE tile for QC:
022c35df-0e5e-45ba-a013-06008c324cd5
ooooooooooooooooooooooooooooooooooooo
(2900000.0, 3500000.0, 2902000.0, 3498000.0)
Image shape at 100 m resolution: (14, 24) pixels
check 1.1:.....
check 1.2:.....
check 1.3:.....
check 1.4:.....
check 1.5:.....
xxxxxxxxxxxxxxxxxxxxxxxxxxxxx
ooooooooooooooooooooooooooooooooooooo
RASTER for QC:
<DirEntry 'env_