## QC of Ingestion 

In [1]:
# Configure plots for inline use in Jupyter Notebook
%matplotlib inline

import datetime as dt

# Utilities
import boto3
import dateutil
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import os
import rasterio
import rasterio.mask
from rasterio.plot import show_hist
from rasterio.plot import show
from rasterio.windows import Window
import random
import fiona
import numpy as np
from shapely.geometry import mapping, Polygon
from shapely import geometry
import logging    

# Sentinel Hub
from sentinelhub import (
    CRS,
    BBox,
    ByocCollection,
    ByocCollectionAdditionalData,
    ByocCollectionBand,
    ByocTile,
    DataCollection,
    DownloadFailedException,
    MimeType,
    SentinelHubBYOC,
    SentinelHubRequest,
    SHConfig,
    bbox_to_dimensions,
    os_utils,
)

config = SHConfig()
config.instance_id = os.environ.get("SH_INSTANCE_ID")
config.sh_client_id = os.environ.get("SH_CLIENT_ID")
config.sh_client_secret = os.environ.get("SH_CLIENT_SECRET")
config.aws_access_key_id = os.environ.get("username")
config.aws_secret_access_key = os.environ.get("password")
    
print ("done")
    

done


In [7]:
# Read raster files on S3 drive
#### CHECKING ENV ZONES:
#### https://github.com/FAIRiCUBE/uc1-urban-climate/blob/master/notebooks/dev/f01_ingestion/edc_ingestion/ingestion_08_env_zones.ipynb

#https://sentinelhub-py.readthedocs.io/en/latest/examples/byoc_request.html#Create-new-collection


##########################SET name and data folder: 
name_of_ingestion = "environmental_zones_1km"  ## should be the collection name!!!"
collection_id ='5b45916e-6704-4581-824f-4d713198731b'  # collection ID 
input_folder ="./../../../../s3/data/d005_env_zones/eea_r_3035_1_km_envzones_p_2018_v01_r00"   ## conect to tile folder on S3
###################################################




# assign directory
directory = input_folder
print (directory)
print ("following raster is selected for QC:")

##################################### QC log file set up:
#now we will Create and configure logger 
#https://www.geeksforgeeks.org/reading-writing-text-files-python/
qc_log = open("QC_"+name_of_ingestion+".txt","w")
qc_log.write("QC_"+name_of_ingestion+" \n")
qc_log.write("---------------------------------------------------------- \n")
#########################################################



###############################################################open collection:
# check ingestion status

# Initialize SentinelHubBYOC class
byoc = SentinelHubBYOC(config=config)

#good=' 5b45916e-6704-4581-824f-4d713198731b'
#############################################################
name_of_your_collection = name_of_ingestion

#############################################################
#collection = ByocCollection(name=name_of_your_collection, s3_bucket="hub-fairicube0")
#byoc.delete_collection(new_collection) # for the delte of the collection
#my_collection = byoc.get_collection(created_collection["id"])


collections_iterator = byoc.iter_collections(name_of_your_collection)

my_collection = list(collections_iterator)[0]



#print("Collection name:", my_collection["name"])
#print("Collection id: ", my_collection["id"])
tiles = list(byoc.iter_tiles(my_collection))

for tile in tiles:
    #print(tile)
    #print("Tile created: ", tile['path'])
    tile_name = tile['path'].split("/")
    tile_name_2=(tile_name[3])
    #print ( tile['path'])
    #print (tile_name_2)
    tile_name_3 = tile_name_2.split("(")
    tile_name_4= tile_name_3[0]+"B01.tif"
    #print (tile_name_4)

    
    ###########################################################################################################################START QC of every single tile:

    
    ## reading the raster files on s3:
    raster_list=[]
    # iterate over files in
    # that directory
    for filename in os.scandir(directory):                                 ####LOOP first over cube tiles - then over raster tiles  (1)
        if filename.is_file():
            #print(filename.path)
            raster_with_full_filename = filename
            raster_name_from_s3_1=filename.path.split("/")
            raster_name_from_s3_2=raster_name_from_s3_1[-1]
            
            #print (raster_name_from_s3_2)
            
            if tile_name_4 == raster_name_from_s3_2:                        ####LOOP first over cube tiles - then over raster tiles  (2)
                print("xxxxxxxxxxxxxxxxxxxxxxxxxxxxx")
                
                ## PART 1 select randomly one raster: ----------------------------------    
                raster_for_qc = raster_with_full_filename  
                print('ooooooooooooooooooooooooooooooooooooo')
                print ( "RASTER for QC:")
                print(raster_for_qc)
                print ( "CUBE tile for QC:")
                print(tile['id'])
                      
                print('ooooooooooooooooooooooooooooooooooooo')
                

                ### PART 1 b read raster: ----------------------------------  

                ##raster_for_qc=test_raster############################################################################################# 
                raster = rasterio.open(raster_for_qc)
                qc_log.write("(1) CHECK 1 - spatial check for raster files:   \n")
                qc_log.write("----------------------------------------------   \n")

                ##https://rasterio.readthedocs.io/en/stable/quickstart.html
                #qc_log.write("Reading spatial information from s3 tiles   \n")
                
                
                raster_name = raster.name
                raster_bands = raster.count
                raster_width= raster.width
                raster_height= raster.height
                raster_bounds= raster.bounds
                
                qc_log.write( raster_name + "   \n ")
                #
                left = raster_bounds[0]
                bottom = raster_bounds[1]
                right = raster_bounds[2]
                top = raster_bounds[3]

                raster_transform =raster.transform
                pixelSizeX = raster_transform[0]
                pixelSizeY =-raster_transform[4]
                raster_crs =raster.crs

                #qc_log.write ("raster information:  \n")
                #qc_log.write (raster_name           + " \n")
                #qc_log.write ('================  \n')
                #qc_log.write (str(raster_bands)     + " \n")
#
                #qc_log.write (str(raster_bounds)    + " \n")
                #qc_log.write (str(raster_transform) + " \n")
#
                #qc_log.write (str(raster_crs)        +" \n")
                #qc_log.write ('-----------------  \n')
                #qc_log.write (str(left)+'  left  \n')
                #qc_log.write (str(bottom)+'  bottom  \n')
                #qc_log.write (str(right)+'  right  \n')
                #qc_log.write (str(top)+'  top  \n')
                #qc_log.write ('-----------------  \n')
                #
                raster_dtype = raster.dtypes[0]
                raster_nodata = raster.nodata
                #qc_log.write ("raster data type: " + str(raster_dtype)     + " \n")
                #qc_log.write ("raster nodata value: " + str(raster_nodata) + " \n")  
                #
                
               #with rasterio.open(raster_for_qc) as ds:   ### checking band statistics: transfer data to array:
               #    band_data=ds.read()
               #    r_min = band_data.min() 
               #    r_max = band_data.max() 
               #    r_mean = band_data.mean() 
                    
                with rasterio.open(raster_for_qc) as ds:
                    band_data = ds.read(1, window=Window(0, 0, 20, 20))  # pixel 2000/100 = 20
                    r_min_raster = band_data.min() 
                    r_max_raster = band_data.max() 
                    r_mean_raster = band_data.mean() 
                     

                    #test= array.np.median()
                # print ("band statistics:")
                # print (r_min)
                # print (r_max)
                # print (r_mean)
                #print (raster_with_full_filename)
            
                ##cube tiles_for_qc=test_raster#############################################################################################  START
                
                #print(tile)
                #qc_log.write ("-------------------------------------------------------  \n")
                #qc_log.write ("cube tile  information:  \n")
                #qc_log.write ("======================== \n")
                #cube_tile_properties = tile['coverGeometry']
                #print(cube_tile_properties)
                
                ### getting data statistics from small AOI inside the cube-tile:  
                #x1 =bottom_left_x
                #y1 =bottom_left_y
                x1 =left
                y1 =top
                
                
                #x1 =4014674
                #y1= 2933830
                x2 =x1 + 2000
                y2 =y1 - 2000
                resolution = 100
                bbox_coords = x1, y1, x2, y2
                print (bbox_coords)
                lux_bbox=  BBox(bbox=bbox_coords, crs=CRS('3035').pyproj_crs())
                
                
                
                lux_size = bbox_to_dimensions(lux_bbox, resolution=resolution)
                print(f"Image shape at {resolution} m resolution: {lux_size} pixels")
                data_collection_aoi = DataCollection.define_byoc(collection_id, name=name_of_ingestion)

                # simple evalscript to check if single request works
                evalscript_test = """

                //VERSION=3
                function setup() {
                  return {
                    input: ["B01"],
                    output: { 
                        bands: 1,
                        sampleType: "UINT16" // raster format will be UINT16
                        }

                  };
                }

                function evaluatePixel(sample) {
                  return [sample.B01];
                }
                """
                request = SentinelHubRequest(
                        evalscript=evalscript_test,
                        input_data=[
                            SentinelHubRequest.input_data(
                                data_collection=DataCollection.environmental_zones_1km,
                                # time_interval=("2018-01-01", "2019-01-01")
                                # time_interval="2018"
                            )
                        ],
                        responses=[
                            SentinelHubRequest.output_response('default', MimeType.PNG)
                        ],
                        bbox=lux_bbox,
                        size=bbox_to_dimensions(lux_bbox, 100),
                        config=config)

                data = request.get_data()[0]

                data_min_cube = np.min(data)
                data_max_cube = np.max(data)
                data_mean_cube= np.mean(data)
                #print (data_min_cube)
                #print (data_max_cube)
                #print (data_mean_cube)
                ##cube tiles_for_qc=test_raster#############################################################################################  END
                
                qc_log.write ("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n")  
                print ("check 1.1:.....") ########################################################################################check 1.1 CRS
                EPSG_code = tile['coverGeometry'][ 'crs']['properties'][ 'name'].split(':')[-1]
                #qc_log.write ("ESPG: "+str(EPSG_code)           + " \n")
                #print ("---------")
                cube_epsg_str =  ("EPSG:"+str(EPSG_code))
                
                qc_log.write ("check 1.1 (CRS) -START \n")   
                qc_log.write ("-RASTER:  \n")
                qc_log.write ("  EPSG code: " +str(raster_crs) +' \n')    
                qc_log.write ("-CUBE_TILE:  \n")
                qc_log.write ("  EPSG code: " +str(cube_epsg_str) +' \n')            
                raster_epsg_str =  (str(raster_crs))
                #print ("---------")
                
                if cube_epsg_str == raster_epsg_str:
                    qc_log.write ("check 1.1 - EPSG (crs): OK   \n")
                else:
                    qc_log.write ("check 1.1 - EPSG (crs): NOT-OK  \n")
                    
                qc_log.write ("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n")   
                print ("check 1.2:.....") ########################################################################################check 1.2 cell size
                qc_log.write ("check 1.2 (cellsize)-START  \n")  
                qc_log.write ("-RASTER:  \n")
                qc_log.write ("  Pixel size x: " +str(pixelSizeX) +' \n')
                qc_log.write ("  Pixel siez y: " +str(pixelSizeY) +' \n')
                              
                qc_log.write ("-CUBE_TILE:  \n")
                
                qc_log.write ("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n")  
                print ("check 1.3:.....") ########################################################################################check 1.3 tile size extend:
                qc_log.write ("check 1.3 (extend-START \n")  
                qc_log.write ("-RASTER:  \n")
                qc_log.write ("  wiht: "+str(raster_width)     + " \n")
                qc_log.write ("  height: "+str(raster_height)    + " \n")
                qc_log.write ("-CUBE_TILE:  \n")
                
                
                #pixelSizeX = raster_transform[0]   ## raster
                #pixelSizeY =-raster_transform[4]    ###raster
                qc_log.write ("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n")  
                print ("check 1.4:.....")             
                qc_log.write ("check 1.4 (data type & statistics)-START  \n")  
                #https://sentinelhub-py.readthedocs.io/en/latest/examples/byoc_request.html
                ##print ("NEXT file:.....")
                
                qc_log.write ("-RASTER:  \n")
                qc_log.write ("  raster data type: "    + str(raster_dtype)     + " \n")
                qc_log.write ("  raster nodata value: " + str(raster_nodata)    + " \n")  
                qc_log.write ("  max raster value: "  +str(r_max_raster) +' \n')
                qc_log.write ("  min raster value: " +str(r_min_raster) +' \n')
                qc_log.write ("  avg raster value: " +str(r_mean_raster) +' \n')
                qc_log.write ("-CUBE_TILE:  \n")
                qc_log.write ("  max cube value: "  +str(data_max_cube) +' \n')
                qc_log.write ("  min cube value: " +str(data_min_cube) +' \n')
                qc_log.write ("  avg cube  value: " +str(data_mean_cube) +' \n')
                
                
                if str(r_max_raster) == str(data_max_cube)  and str(r_min_raster) == str(data_min_cube) :                     
                    qc_log.write ("check 1.4 - data : OK   \n")
                else:
                    qc_log.write ("check 1.4 - data :  NOT-OK  \n")
                                  
                                  
                qc_log.write ("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n")  
                print ("check 1.5:.....")  
                qc_log.write ("check 1.5 (extend -START \n")  
                qc_log.write ("-RASTER:  \n")
                qc_log.write ("  left: "+str(left)+'        \n')
                qc_log.write ("  bottom: " +str(bottom)+'    \n')
                qc_log.write ("  right: "+str(right)+'       \n')
                qc_log.write ("  top: " +str(top)+'            \n')
                qc_log.write ("-CUBE_TILE:  \n")
                
                #print (t['tileGeometry'] ['coordinates']  )
                bbox = tile['tileGeometry'] ['coordinates']

                ## top left
                #print ("top left:")
                top_left_x = bbox[0][0][0] 
                #print (top_left_x)
                top_left_y = bbox[0][0][1] 
                #print (top_left_y)

                ## rop  right
                #print ("top right:")
                top_right_x = bbox[0][1][0] 
                #print (top_right_x)
                top_right_y = bbox[0][1][1] 
                #print (top_right_y)


                ## bottom  right
                #print ("bottom right:")
                bottom_right_x = bbox[0][2][0] 
                #print (bottom_right_x)
                bottom_right_y = bbox[0][2][1] 
                #print (bottom_right_y)

                ## bottom left 
                #print ("bottom left:")
                bottom_left_x = bbox[0][3][0] 
                #print (bottom_left_x)
                bottom_left_y = bbox[0][3][1] 
                #print (bottom_left_y)
                
                qc_log.write ("  left: "   +str(bottom_left_x)+'         \n')
                qc_log.write ("  bottom: " +str(bottom_left_y)+'         \n')
                qc_log.write ("  right: "  +str(top_right_x)  +'         \n')
                qc_log.write ("  top: "    +str(top_right_y)  +'         \n')
                
                if str(bottom_left_x) == str(left)  and str(bottom_left_y) == str(bottom) and str(top_right_x) == str(right)    and str(top_right_y) == str(top):                     
                    qc_log.write ("check 1.5 - extend : OK   \n")
                else:
                    qc_log.write ("check 1.5 - extend :  NOT-OK  \n")
                
#print("2-------------------")

###############################################################open collection:

qc_log.close()
print ("end")


./../../../../s3/data/d005_env_zones/eea_r_3035_1_km_envzones_p_2018_v01_r00
following raster is selected for QC:
xxxxxxxxxxxxxxxxxxxxxxxxxxxxx
ooooooooooooooooooooooooooooooooooooo
RASTER for QC:
<DirEntry 'env_zones_1km_3035_2_6_B01.tif'>
CUBE tile for QC:
00cec537-6717-4e3b-89ad-7bd3240af390
ooooooooooooooooooooooooooooooooooooo
(5900000.0, 4500000.0, 5902000.0, 4498000.0)
Image shape at 100 m resolution: (27, 10) pixels
check 1.1:.....
check 1.2:.....
check 1.3:.....
check 1.4:.....
check 1.5:.....
xxxxxxxxxxxxxxxxxxxxxxxxxxxxx
ooooooooooooooooooooooooooooooooooooo
RASTER for QC:
<DirEntry 'env_zones_1km_3035_3_3_B01.tif'>
CUBE tile for QC:
022c35df-0e5e-45ba-a013-06008c324cd5
ooooooooooooooooooooooooooooooooooooo
(2900000.0, 3500000.0, 2902000.0, 3498000.0)
Image shape at 100 m resolution: (14, 24) pixels
check 1.1:.....
check 1.2:.....
check 1.3:.....
check 1.4:.....
check 1.5:.....
xxxxxxxxxxxxxxxxxxxxxxxxxxxxx
ooooooooooooooooooooooooooooooooooooo
RASTER for QC:
<DirEntry 'env_

In [54]:
print ("raster info:")

from rasterio.windows import Window
input_folder ="./../../../../s3/data/d005_env_zones/eea_r_3035_1_km_envzones_p_2018_v01_r00"  


raster_with_full_filename = input_folder+"/env_zones_1km_3035_2_5_B01.tif"


raster_dtype = raster.dtypes[0]
raster_nodata = raster.nodata
print (raster_dtype)
print (raster_nodata)


with rasterio.open(raster_with_full_filename) as ds:
    wband_data = ds.read(1, window=Window(0, 0, 20, 20))  # pixel 2000/100 = 20
    
    band_data=ds.read()
    r_min = band_data.min() 
    r_max = band_data.max() 
    r_mean = band_data.mean() 
    r_mean2 = wband_data.mean() 
    
    #test= array.np.median()
print ("badnd statistics:")
print (r_min)
print (r_max)
print (r_mean)
print (r_mean2)
#print (raster_with_full_filename)



raster info:
uint8
0.0
badnd statistics:
0
6
1.00431891
2.0


In [4]:
from src import utils
utils.list_byoc_collections(list_tiles=False)

Collection name: TreeCoverDensity2018_10m_raster
Collection id:  3947b646-383c-4e91-aade-2f039bd6ba4b
-------------------
Collection name: UrbanAtlas2012_10m_raster
Collection id:  47ea75a0-875f-4ee9-ae78-b02be89d43d7
-------------------
Collection name: environmental_zones_1km
Collection id:  5b45916e-6704-4581-824f-4d713198731b
-------------------
Collection name: UrbanAtlas2018_10m_raster
Collection id:  78545a11-1e57-4ed0-a3ce-68c2e305216b
-------------------
Collection name: urban_audit_2021_city
Collection id:  7b4d761f-893b-43be-a4d6-4f37375318d3
-------------------
Collection name: urban_audit_2021_fua
Collection id:  c12e7be5-9f5e-441c-930f-2f274218e6d3
-------------------
Collection name: ImperviousnessDensity2018_10m_raster
Collection id:  c57f7668-2717-4529-93cc-5372bc96ebbe
-------------------
Collection name: nuts3_2021
Collection id:  d0a3f8c5-bf65-4bf2-9a3d-37a1d7101874
-------------------
Collection name: Halle_UA_height
Collection id:  dfad0a7e-9d3a-46e2-9fb6-7940f85b

In [5]:
### testing cube
import json
print (tile)

for t in tiles:
        if t['id'] == '00cec537-6717-4e3b-89ad-7bd3240af390':
            print (t['id'])
            #print (t['properties'])
            for item in t:
                print (item)
            
            print (t['tileGeometry'] ['coordinates']  )
            bbox = t['tileGeometry'] ['coordinates']
            
            ## top left
            print ("top left:")
            top_left_x = bbox[0][0][0] 
            print (top_left_x)
            top_left_y = bbox[0][0][1] 
            print (top_left_y)
            
            ## rop  right
            print ("top right:")
            top_right_x = bbox[0][1][0] 
            print (top_right_x)
            top_right_y = bbox[0][1][1] 
            print (top_right_y)
 

            ## bottom  right
            print ("bottom right:")
            bottom_right_x = bbox[0][2][0] 
            print (bottom_right_x)
            bottom_right_y = bbox[0][2][1] 
            print (bottom_right_y)
            
            ## bottom left 
            print ("bottom left:")
            bottom_left_x = bbox[0][3][0] 
            print (bottom_left_x)
            bottom_left_y = bbox[0][3][1] 
            print (bottom_left_y)
            

            
    
#extend = tile['coverGeometry'][ 'crs']['properties'][ 'name'].split(':')[-1]


#bbox = tile['coordinates']

#print = (bbox)



{'id': 'f40821f5-bacb-4bc6-8d98-358035a04e11', 'created': '2023-12-12T14:21:31.760197Z', 'ingestionStart': '2023-12-12T14:21:31.760197Z', 'sensingTime': '2018-01-01T00:00:00Z', 'coverGeometry': {'type': 'Polygon', 'crs': {'type': 'name', 'properties': {'name': 'urn:ogc:def:crs:EPSG::3035'}}, 'coordinates': [[[899999.9997812272, 4499999.999760259], [1899999.9998805118, 4499999.999828045], [1899999.9998928742, 5499999.999900502], [899999.9998236685, 5499999.99987142], [899999.9997812272, 4499999.999760259]]]}, 'tileGeometry': {'type': 'Polygon', 'crs': {'type': 'name', 'properties': {'name': 'urn:ogc:def:crs:EPSG::3035'}}, 'coordinates': [[[900000.0, 5500000.0], [1900000.0, 5500000.0], [1900000.0, 4500000.0], [900000.0, 4500000.0], [900000.0, 5500000.0]]]}, 'path': 'data/d005_env_zones/eea_r_3035_1_km_envzones_p_2018_v01_r00/env_zones_1km_3035_1_1_(BAND).tif', 'status': 'INGESTED', 'additionalData': {'hasAddedPoints': True, 'filesMetadata': {'B01': {'headerSize': 2436, 'etag': '"90acd49a

In [47]:
##checking tile: max min from CUBE-til3e
import IPython.display
#print(tile)
import shapely.geometry

# x1 = 6  # degree
# y1 = 49  # degree
# x2 = 6.9 # degree
# y2 = 49.9  # degree

# bbox_lux = x1, y1, x2, y2



x1 =bottom_left_x
y1 =bottom_left_y


x1 =4014674
y1= 2933830
x2 =x1 + 2000
y2 =y1 + 2000


resolution = 100
# lux_bbox = BBox(bbox=bbox_lux, crs=CRS.WGS84)#
bbox_coords = x1, y1, x2, y2
print (bbox_coords)
lux_bbox=  BBox(bbox=bbox_coords, crs=CRS('3035').pyproj_crs())
lux_size = bbox_to_dimensions(lux_bbox, resolution=resolution)
print(f"Image shape at {resolution} m resolution: {lux_size} pixels")

IPython.display.GeoJSON(shapely.geometry.box(*bbox_lux).__geo_interface__)

#print (tile)

#collection_id= tile['id']

name_of_ingestion = "environmental_zones_1km"
collection_id ='5b45916e-6704-4581-824f-4d713198731b'
#collections_iterator = byoc.iter_collections(name_of_your_collection)
data_collection_aoi = DataCollection.define_byoc(collection_id, name=name_of_ingestion)

# simple evalscript to check if single request works
evalscript_test = """

//VERSION=3
function setup() {
  return {
    input: ["B01"],
    output: { 
        bands: 1,
        sampleType: "UINT16" // raster format will be UINT16
        }
    
  };
}

function evaluatePixel(sample) {
  return [sample.B01];
}
"""
request = SentinelHubRequest(
        evalscript=evalscript_test,
        input_data=[
            SentinelHubRequest.input_data(
                data_collection=DataCollection.environmental_zones_1km,
                # time_interval=("2018-01-01", "2019-01-01")
                # time_interval="2018"
            )
        ],
        responses=[
            SentinelHubRequest.output_response('default', MimeType.PNG)
        ],
        bbox=lux_bbox,
        size=bbox_to_dimensions(lux_bbox, 100),
        config=config)
    
data = request.get_data()[0]

data_min = np.min(data)
data_max = np.max(data)
data_mean = np.mean(data)
print (data_min)
print (data_max)
print (data_mean)

(4014674, 2933830, 4016674, 2935830)
Image shape at 100 m resolution: (18, 22) pixels
6
7
6.492424242424242


In [48]:


data_mean



6.492424242424242

In [49]:
from rasterio.windows import Window

with rasterio.open('tests/data/RGB.byte.tif') as src:
    w = src.read(1, window=Window(0, 0, 512, 256))



NameError: name 'col_off' is not defined

In [8]:
# Read raster files on S3 drive
#### CHECKING ENV ZONES:
#### https://github.com/FAIRiCUBE/uc1-urban-climate/blob/master/notebooks/dev/f01_ingestion/edc_ingestion/ingestion_08_env_zones.ipynb

#https://sentinelhub-py.readthedocs.io/en/latest/examples/byoc_request.html#Create-new-collection


##########################SET name and data folder: 
name_of_ingestion = "environmental_zones_1km"  ## should be the collection name!!!"

input_folder ="./../../../../s3/data/d005_env_zones/eea_r_3035_1_km_envzones_p_2018_v01_r00"   ## conect to tile folder on S3
###################################################




# assign directory
directory = input_folder
print (directory)
print ("following raster is selected for QC:")

##################################### QC log file set up:
#now we will Create and configure logger 
#https://www.geeksforgeeks.org/reading-writing-text-files-python/
qc_log = open("QC_"+name_of_ingestion+".txt","w")
qc_log.write("QC_"+name_of_ingestion+" \n")
qc_log.write("---------------------------------------------------------- \n")
#########################################################

raster_list=[]
# iterate over files in
# that directory
for filename in os.scandir(directory):
    if filename.is_file():
        #print(filename.path)
        #print 
        raster_list.append(filename.path)
             
        
## PART 1 select randomly one raster: ----------------------------------    
raster_for_qc = raster_list[0]  # Select first raster of the raster-list
#raster_for_qc = random.choice(raster_list)   # select one file radmonly 
print(raster_for_qc)

## PART 1 b read raster: ----------------------------------  

#raster_for_qc=test_raster############################################################################################# for testing
raster = rasterio.open(raster_for_qc)


qc_log.write("(1) CHECK 1 - (spatial check for raster files:   \n")
qc_log.write("----------------------------------------------   \n")

#https://rasterio.readthedocs.io/en/stable/quickstart.html
qc_log.write("Reading spatial information from s3 tiles   \n")
qc_log.write(raster_for_qc+"   \n")
raster_name = raster.name
raster_bands = raster.count
raster_width= raster.width
raster_height= raster.height
raster_bounds= raster.bounds
#
left = raster_bounds[0]
bottom = raster_bounds[1]
right = raster_bounds[2]
top = raster_bounds[3]

raster_transform =raster.transform
pixelSizeX = raster_transform[0]
pixelSizeY =-raster_transform[4]
raster_crs =raster.crs

qc_log.write ("raster information:  \n")
qc_log.write (raster_name           + " \n")
qc_log.write ('================  \n')
qc_log.write (str(raster_bands)     + " \n")
qc_log.write (str(raster_width)     + " \n")
qc_log.write (str(raster_height)    + " \n")
qc_log.write (str(raster_bounds)    + " \n")
qc_log.write (str(raster_transform) + " \n")
qc_log.write ('-----------------  \n')
qc_log.write (str(pixelSizeX) +' pixel size x  \n')
qc_log.write (str(pixelSizeY) +' pixel size y  \n')
qc_log.write (str(raster_crs)        +" \n")
qc_log.write ('-----------------  \n')
qc_log.write (str(left)+'  left  \n')
qc_log.write (str(bottom)+'  bottom  \n')
qc_log.write (str(right)+'  right  \n')
qc_log.write (str(top)+'  top  \n')



###############################################################open collection:
# check ingestion status

# Initialize SentinelHubBYOC class
byoc = SentinelHubBYOC(config=config)

#good=' 5b45916e-6704-4581-824f-4d713198731b'
#############################################################
name_of_your_collection = name_of_ingestion

#############################################################
#collection = ByocCollection(name=name_of_your_collection, s3_bucket="hub-fairicube0")
#byoc.delete_collection(new_collection) # for the delte of the collection
#my_collection = byoc.get_collection(created_collection["id"])


collections_iterator = byoc.iter_collections(name_of_your_collection)

my_collection = list(collections_iterator)[0]



#print("Collection name:", my_collection["name"])
#print("Collection id: ", my_collection["id"])
tiles = list(byoc.iter_tiles(my_collection))

for tile in tiles:
    #print(tile)
    #print("Tile created: ", tile['path'])
    tile_name = tile['path'].split("/")
    tile_name_2=(tile_name[3])
    #print ( tile['path'])
    #print (tile_name_2)
    tile_name_3 = tile_name_2.split("(")
    tile_name_4= tile_name_3[0]+"B01.tif"
    print (tile_name_4)
    ### checking s3 raster files:
    raster_name_from_s3_1=raster_name.split("/")
    raster_name_from_s3_2=raster_name_from_s3_1[-1]
    print (raster_name_from_s3_2)
    
    #print("Tile status: ", tile['status'])
    #print("Tile created: ", tile['created'])
    #if(tile['status'] == "FAILED"):
    #    print("Ingestion failed error: ", tile['additionalData'])
#print("2-------------------")

###############################################################open collection:

qc_log.close()
print ("end")


./../../../../s3/data/d005_env_zones/eea_r_3035_1_km_envzones_p_2018_v01_r00
following raster is selected for QC:
./../../../../s3/data/d005_env_zones/eea_r_3035_1_km_envzones_p_2018_v01_r00/env_zones_1km_3035_1_1_B01.tif
env_zones_1km_3035_2_6_B01.tif
env_zones_1km_3035_1_1_B01.tif
env_zones_1km_3035_3_3_B01.tif
env_zones_1km_3035_1_1_B01.tif
env_zones_1km_3035_5_7_B01.tif
env_zones_1km_3035_1_1_B01.tif
env_zones_1km_3035_1_3_B01.tif
env_zones_1km_3035_1_1_B01.tif
env_zones_1km_3035_2_4_B01.tif
env_zones_1km_3035_1_1_B01.tif
env_zones_1km_3035_2_3_B01.tif
env_zones_1km_3035_1_1_B01.tif
env_zones_1km_3035_5_3_B01.tif
env_zones_1km_3035_1_1_B01.tif
env_zones_1km_3035_4_7_B01.tif
env_zones_1km_3035_1_1_B01.tif
env_zones_1km_3035_3_2_B01.tif
env_zones_1km_3035_1_1_B01.tif
env_zones_1km_3035_3_4_B01.tif
env_zones_1km_3035_1_1_B01.tif
env_zones_1km_3035_5_1_B01.tif
env_zones_1km_3035_1_1_B01.tif
env_zones_1km_3035_5_6_B01.tif
env_zones_1km_3035_1_1_B01.tif
env_zones_1km_3035_1_5_B01.tif
env

In [9]:
my_collections

NameError: name 'my_collections' is not defined

In [None]:
list(collections_iterator)


In [None]:
# Read raster files on S3 drive


#https://sentinelhub-py.readthedocs.io/en/latest/examples/byoc_request.html#Create-new-collection


input_folder ="./../../../s3/data/d002_urban_atlas"
aoi_folder ="./../../../s3/data/d002_urban_atlas/aoi"

##./../../../s3/data/d001_administration/nuts_2021

test_raster = "./../../../s3/data/d002_urban_atlas/ua2018_mosaic_raster_10m_cog_2_2_B1.tif"


# assign directory
directory = input_folder
print (directory)
print ("following raster is selected for QC:")

raster_list=[]
# iterate over files in
# that directory
for filename in os.scandir(directory):
    if filename.is_file():
        #print(filename.path)
        raster_list.append(filename.path)
             
        
## PART 1 select randomly one raster: ----------------------------------    
#print (raster_list)
raster_for_qc = random.choice(raster_list)
#print(raster_for_qc)

## PART 1 b read raster: ----------------------------------   
raster_for_qc=test_raster############################################################################################# for testing
raster = rasterio.open(raster_for_qc)



#raster = rasterio.open(test_raster)

#https://rasterio.readthedocs.io/en/stable/quickstart.html
raster_name = raster.name
raster_bands = raster.count
raster_width= raster.width
raster_height= raster.height
raster_bounds= raster.bounds

left = raster_bounds[0]
bottom = raster_bounds[1]
right = raster_bounds[2]
top = raster_bounds[3]

raster_transform =raster.transform
pixelSizeX = raster_transform[0]
pixelSizeY =-raster_transform[4]
raster_crs =raster.crs
# print raster information:
# print (raster_name)
# print ('================')
# print (raster_bands)
# print (raster_width)
# print (raster_height)
# print (raster_bounds)
# print (raster_transform)
# print ('-----------------')
# print (str(pixelSizeX) +' pixel size x')
# print (str(pixelSizeY) +' pixel size y')
# print (raster_crs)
# print ('-----------------')
# print (str(left)+'  left')
# print (str(bottom)+'  bottom')
# print (str(right)+'  right')
# print (str(top)+'  top')


# build random aoi:--------------------------------------
random__y = random.randrange(bottom,top)
random__x = random.randrange(left,right)
print (random__x)
print (random__y)
aoi_left= random__y
aoi_buttom=random__x
aoi_right=random__y +1000
aoi_top=random__x+1000

fixed_center_coordinate_x = 3024866  #  North of Frankfurt
fixed_center_coordinate_y =4201149

##3458860,3203796 : 3491673,3249116 ## test extent for ua2018_mosaic_raster_10m_cog_2_2_B1
fixed_center_coordinate_x = 3458860  # ua2018_mosaic_raster_10m_cog_2_2_B1
fixed_center_coordinate_y =3203796

aoi_left= fixed_center_coordinate_y
aoi_buttom=fixed_center_coordinate_x
aoi_right=fixed_center_coordinate_y +1000
aoi_top=fixed_center_coordinate_x+1000
#https://stackoverflow.com/questions/30457089/how-to-create-a-shapely-polygon-from-a-list-of-shapely-points
p1 = geometry.Point(aoi_left,aoi_buttom) # 0-0
p2 = geometry.Point(aoi_left,aoi_top)  # 1 0

p3 = geometry.Point(aoi_right,aoi_top)
p4 = geometry.Point(aoi_right,aoi_buttom)
pointList = [p1, p2, p3, p4, p1]                    
poly = geometry.Polygon([[p.x, p.y] for p in pointList])
print(poly.wkt) 

# Define a polygon feature geometry with one attribute
schema = {
    'geometry': 'Polygon',
    'properties': {'id': 'int'},
    }
# Write a new Shapefile
with fiona.open(aoi_folder+'/'+ 'aoi.shp', 'w', 'ESRI Shapefile', schema) as c:
    ## If there are multiple geometries, put the "for" loop here
    c.write({
        'geometry': mapping(poly),
        'properties': {'id': 123},
    })


###testing aoi

with fiona.open(aoi_folder+'/'+ 'aoi.shp', "r") as shapefile:
     shapes = [feature["geometry"] for feature in shapefile]


with rasterio.open(raster_for_qc) as src:
     out_image, out_transform = rasterio.mask.mask(src, shapes, crop=True)
     out_meta = src.meta


print (out_image)


out_meta.update({"driver": "GTiff",
                  "height": out_image.shape[1],
                  "width": out_image.shape[2],
                  "transform": out_transform})
output_raster = aoi_folder+"/aoi.tif"

with rasterio.open(output_raster, "w", **out_meta) as dest:
    dest.write(out_image)


# Plot / show band 1--------------------------------------------------------------START QC of AOI RASTER:
raster_show = rasterio.open(output_raster)
show((raster_show, 1))    
print ("Raster info:....................")
print(raster_show.profile)
print ("Raster Histogramm:....................")
band1 = raster_show.read(1)
#print(band1)
#uniq_vals = np.unique(band1)
#print(sorted(uniq_vals))
print ("Count of pixel values:..........")
unique, counts = np.unique(band1, return_counts=True)
print(np.asarray((unique, counts)).T)

pixel_max = np.max(band1)
print ("Pixel max: '" + str(pixel_max))
pixel_min = np.min(band1)
print ("Pixel min'" + str(pixel_min))





#############
print ("add. RASTER info.............................................:..........")
raster = raster_show
raster_name = raster.name
raster_bands = raster.count
raster_width= raster.width
raster_height= raster.height
raster_bounds= raster.bounds

left = raster_bounds[0]
bottom = raster_bounds[1]
right = raster_bounds[2]
top = raster_bounds[3]

raster_transform =raster.transform
pixelSizeX = raster_transform[0]
pixelSizeY =-raster_transform[4]
raster_crs =raster.crs
# print raster information:
print (raster_name)
print ('================')
print (raster_bands)
print (raster_width)
print (raster_height)
print (raster_bounds)
print (raster_transform)
print ('-----------------')
print (str(pixelSizeX) +' pixel size x')
print (str(pixelSizeY) +' pixel size y')
print (raster_crs)
print ('-----------------')
print (str(left)+'  left')
print (str(bottom)+'  bottom')
print (str(right)+'  right')
print (str(top)+'  top')


print ("end")


In [None]:
#xx error hello world
##for collection in my_collections:   
 # byoc.delete_collection(collection)
#print ("end")

In [8]:
tiles

[{'id': '00cec537-6717-4e3b-89ad-7bd3240af390',
  'created': '2023-12-12T14:21:33.035665Z',
  'ingestionStart': '2023-12-12T14:21:33.035665Z',
  'sensingTime': '2018-01-01T00:00:00Z',
  'coverGeometry': {'type': 'Polygon',
   'crs': {'type': 'name',
    'properties': {'name': 'urn:ogc:def:crs:EPSG::3035'}},
   'coordinates': [[[5900000.000092124, 3499999.999726154],
     [6900000.000182957, 3499999.9996529873],
     [6900000.000131197, 4499999.99982101],
     [5900000.00007046, 4499999.999850882],
     [5900000.000092124, 3499999.999726154]]]},
  'tileGeometry': {'type': 'Polygon',
   'crs': {'type': 'name',
    'properties': {'name': 'urn:ogc:def:crs:EPSG::3035'}},
   'coordinates': [[[5900000.0, 4500000.0],
     [6900000.0, 4500000.0],
     [6900000.0, 3500000.0],
     [5900000.0, 3500000.0],
     [5900000.0, 4500000.0]]]},
  'path': 'data/d005_env_zones/eea_r_3035_1_km_envzones_p_2018_v01_r00/env_zones_1km_3035_2_6_(BAND).tif',
  'status': 'INGESTED',
  'additionalData': {'hasAddedP