## For efficient masking add to each training polygon the tile name of the sentinel-2 tiles  

The sentinel missions map each tile every 5 days, the coordinates of each tile are fixed as well as the names  
Each tile has the size of 110 $km^2$ x 110 $km^2$ with 10 km on each side as overlap to the enclosed tiles 

Download tiling grid polygons: https://sentinels.copernicus.eu/web/sentinel/missions/sentinel-2/data-products

Download german border: http://www.diva-gis.org/gdata
 - Select Germany and Administrative areas
   - DEU_adm1.shp contains the borders as polygons from each state
   - DEU_adm2.shp contains the borders as polygons from each district

In [108]:
import fiona
import geopandas as gpd

# to read a kml file into a geopandas dataframe, we need to activate the KML/kml driver in fiona (which is used in geopandas)
# https://gis.stackexchange.com/questions/114066/handling-kml-csv-with-geopandas-drivererror-unsupported-driver-ucsv

fiona.drvsupport.supported_drivers['kml'] = 'rw' # enable KML support which is disabled b
fiona.drvsupport.supported_drivers['KML'] = 'rw' # enable KML support which is disabled by default
tiles_grid = gpd.read_file(r'C:\Users\Fabian\Downloads\S2A_OPER_GIP_TILPAR_MPC__20151209T095117_V20150622T000000_21000101T000000_B00.kml')

In [109]:
# Name is the name of the associated tile
tiles_grid.head()

Unnamed: 0,Name,Description,geometry
0,01CCV,TILE PROPERTIES<br><table border=0 cellpadding...,GEOMETRYCOLLECTION Z (POLYGON Z ((180.00000 -7...
1,01CDH,TILE PROPERTIES<br><table border=0 cellpadding...,GEOMETRYCOLLECTION Z (POLYGON Z ((180.00000 -8...
2,01CDJ,TILE PROPERTIES<br><table border=0 cellpadding...,GEOMETRYCOLLECTION Z (POLYGON Z ((180.00000 -8...
3,01CDK,TILE PROPERTIES<br><table border=0 cellpadding...,GEOMETRYCOLLECTION Z (POLYGON Z ((180.00000 -8...
4,01CDL,TILE PROPERTIES<br><table border=0 cellpadding...,GEOMETRYCOLLECTION Z (POLYGON Z ((180.00000 -8...


In [110]:
# add centroid (center point of the tile), to use as footprint when query sentinel data
# convert to wkt, else geopandas throw's an error when saving to file
# https://stackoverflow.com/questions/63004400/getting-a-userwarning-when-calculating-centroid-of-a-geoseries/63038899#63038899 
tiles_grid['centroid_of_tile'] = tiles_grid.centroid.apply(lambda x: x.wkt)


  tiles_grid['centroid_of_tile'] = tiles_grid.centroid.apply(lambda x: x.wkt)


In [111]:
# read german border polygon to filter tiles_grid on it.
# use DEU_adm1.shp and filter it on a certain state, to get the tiles for just one or more states of germany (same with DEU_adm2 on district level)
germany_borders_path = r'C:\Users\Fabian\Documents\Masterarbeit_Daten\DEU_adm\DEU_adm0.shp'
germany_borders = gpd.read_file(germany_borders_path)
germany_borders.head()

Unnamed: 0,ID_0,ISO,NAME_0,OBJECTID_1,ISO3,NAME_ENGLI,NAME_ISO,NAME_FAO,NAME_LOCAL,NAME_OBSOL,...,CARICOM,EU,CAN,ACP,Landlocked,AOSIS,SIDS,Islands,LDC,geometry
0,86,DEU,Germany,62,DEU,Germany,GERMANY,Germany,Deutschland,,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"MULTIPOLYGON (((8.70837 47.71556, 8.70918 47.7..."


In [112]:
# get only tiles which cover germany 
# Note it should be only 67 so probably two tiles have are overlapping and not actually needed
tiles_grid_germany = gpd.clip(tiles_grid, germany_borders)
tiles_grid_germany.shape

(69, 4)

In [113]:
tiles_grid_germany.head()

Unnamed: 0,Name,Description,geometry,centroid_of_tile
30185,32TLT,TILE PROPERTIES<br><table border=0 cellpadding...,"POLYGON Z ((7.56263 47.84346 0.00000, 7.79435 ...",POINT (7.079043950618934 47.34283231653833)
30194,32TMT,TILE PROPERTIES<br><table border=0 cellpadding...,"MULTIPOLYGON Z (((7.66287 47.84591 0.00000, 9....",POINT (8.402242584537571 47.35737320574876)
30229,32ULU,TILE PROPERTIES<br><table border=0 cellpadding...,"POLYGON Z ((7.77894 48.49550 0.00000, 7.79640 ...",POINT (7.045611295839894 48.2416842867384)
30238,32UMU,TILE PROPERTIES<br><table border=0 cellpadding...,"POLYGON Z ((7.96933 48.74674 0.00000, 9.13278 ...",POINT (8.391831875811269 48.256688005715716)
31159,33UVP,TILE PROPERTIES<br><table border=0 cellpadding...,"POLYGON Z ((13.63918 48.74498 0.00000, 13.8144...",POINT (14.391831875811269 48.256688005715716)


In [114]:
tiles_grid_germany.to_file('tiles_grid_germany_centroid.geojson', driver='GeoJSON')

In [115]:
# map tile names on training polygons
trn_polygons_path = r'C:\Users\Fabian\OneDrive - Universität Würzburg\Uni Würzburg\Master\Masterthesis\Code\Einarbeitung\trn_polygons.geojson'
trn_polygons = gpd.read_file(trn_polygons_path)

In [116]:
# filter training polygons which lies within the borders of germany
trn_polygons_germany = gpd.clip(trn_polygons, germany_borders)

In [117]:
trn_polygons_germany.shape

(24233, 37)

In [118]:
# map the polygons (=solar plants) on the name of the associated tile where the polygon lies within
# so later we can look them up by just look for the tile name instead to filter the dataframe on the geometry again (to speed up the process)
trn_polygons_germany_tiles = gpd.sjoin(trn_polygons_germany, tiles_grid_germany, predicate='intersects')

In [119]:
# check how many tiles are needed for the training on all polygons in germany
len(set(trn_polygons_germany_tiles.Name))

64

In [120]:
# check for needed data to safe some space on disk
trn_polygons_germany_tiles.iloc[0,:]

Country                                                           NaN
Province                                                          NaN
Project                                                           NaN
WRI_ref                                                           NaN
Polygon Source                                                    NaN
Date                                                              NaT
building                                                          NaN
operator                                                          NaN
generator_source                                                solar
amenity                                                           NaN
landuse                                                           NaN
power_source                                                      NaN
shop                                                              NaN
sport                                                             NaN
tourism             

In [121]:
# select only needed columns 
selected_trn_polygons_germany_tiles = gpd.GeoDataFrame(trn_polygons_germany_tiles[['osm_id','Name', 'geometry', 'centroid_of_tile']], crs=trn_polygons_germany_tiles.crs, geometry=trn_polygons_germany_tiles.geometry).reset_index(drop=True)
selected_trn_polygons_germany_tiles

Unnamed: 0,osm_id,Name,geometry,centroid_of_tile
0,483534385.0,33UVS,"POLYGON ((13.88598 50.89644, 13.88598 50.89642...",POINT (14.357301607280887 50.95484948446869)
1,483534393.0,33UVS,"POLYGON ((13.88548 50.89585, 13.88548 50.89582...",POINT (14.357301607280887 50.95484948446869)
2,483534394.0,33UVS,"POLYGON ((13.88554 50.89592, 13.88554 50.89590...",POINT (14.357301607280887 50.95484948446869)
3,483534395.0,33UVS,"POLYGON ((13.88560 50.89600, 13.88561 50.89598...",POINT (14.357301607280887 50.95484948446869)
4,483534396.0,33UVS,"POLYGON ((13.88567 50.89607, 13.88567 50.89605...",POINT (14.357301607280887 50.95484948446869)
...,...,...,...,...
32948,226335168.0,33UUA,"POLYGON ((13.30152 54.47148, 13.30245 54.47116...",POINT (12.757434761701774 54.531811179790914)
32949,226315509.0,33UUA,"POLYGON ((13.29566 54.47219, 13.29899 54.47156...",POINT (12.757434761701774 54.531811179790914)
32950,181394364.0,33UUA,"POLYGON ((13.31349 54.54894, 13.31438 54.54878...",POINT (12.757434761701774 54.531811179790914)
32951,179338993.0,33UUA,"POLYGON ((12.01570 54.17296, 12.01600 54.17329...",POINT (12.757434761701774 54.531811179790914)


In [122]:
# save it to geojson (needs more space on disk)
selected_trn_polygons_germany_tiles.to_file('trn_polygons_germany_tiles.geojson', driver='GeoJSON')

In [124]:
# save it as .shp file (needs less space)
selected_trn_polygons_germany_tiles.to_file('trn_polygons_germany_tiles.shp')

  selected_trn_polygons_germany_tiles.to_file('trn_polygons_germany_tiles.shp')


### The saved file is used for masking the satellite images in the dataset