# Imports and general definitions
At the very beginning, we import necessary packages and make definitions regarding the resolution and the endpoint to the api.

In [None]:
from datetime import datetime
import os
import requests
import json
from rasterio.transform import from_origin
from pyproj import CRS
from subprocess import Popen
import numpy as np
import gc
import subprocess as sp
import xarray as xa
import rioxarray as riox

s2_resolutions = ['stream_2k', 'stream_4k', 'stream_full']
spacial_dim = {'stream_2k': 60, 'stream_4k': 20, 'stream_full': 10}
ENDPOINT = "https://dunia.esa.int/api/streaming/metadata"
ENDPOINT_TILE_BOUNDS = "https://dunia.esa.int/api/streaming/s2-tiles/utm-bounds"
FPS = 4

S2_API = '/s2/{year}/{tile_id}/10'

# Setting Parameters

Secondly, we set the parameters for the stream selection. We specify the following parameters:
- A Sentinel 2 tile id in *tile_id*
- The resolution of the stream. Possible values are 'stream_2k' (60m per pixel), 'stream_4k' (20m per pixel) and 'stream_full' (10m per pixel)
- The year of the observed data in *year*, starting from 2019.
- A start date and time within that year
- An end date and time within that year
- A path to store the geotiffs in.

In [None]:
# tile
tile_id = '32SNG'

# resolution from ['stream_2k', 'stream_4k', 'stream_full']
resolution = 'stream_2k'

# year from [2019, 2020, 2021, 2022, 2023, 2024]
year = 2020

# start_date
start_date = datetime(year=year, month=4, day=1, hour=0, minute=0, second=0)

# end_date
end_date = datetime(year=year, month=6, day=30, hour=23, minute=59, second=59)

# path
save_path = './geotiffs/s2/'

# if the path doesn't exist yet, it is made.
if not os.path.exists(save_path):
    os.makedirs(save_path, exist_ok=True)

# Accessing the stream
To receive an uri to the stream with the right resolution, we send a get request to the dunia api. This also returns metadata for each image in the stream.
The metadata contains:
- cloud coverage in percent
- percentage of non-data in the image
- the sensing time of the image
- the frame number in the stream

The variables stream_link and frames_metadata will be used in the oncoming steps. 

In [None]:
stream_link = None
frames_metadata = None

try:
    # load streams and metadata
    response = requests.get(url=ENDPOINT + S2_API.format(year=year, tile_id=tile_id))

    # saving the uri of the stream according to the selected resolution.
    stream_link = json.loads(response.content)['metadata'][resolution]

    # saving metadata about the images:
    #   sensing time (sensing_time)
    #   frame number (image_number)
    # percentage of non-data in the image (nodata_pixel_percentage)
    # cloud coverage (cloudy_pixel_percentage)
    frames_metadata = json.loads(response.content)['images']
except KeyError as e:
    print('Stream not found')

# Finding the starting frame and frame count
With the help of metadata of each frame we find the first frame that was taken after the start date we specified in the beginning. Then we count how many frames are contained in the stream, before the end date. The variables start_frame and frame_count are saved for later.

In [None]:
    start_frame = -1
    frame_count = 1

    for frame_metadata in frames_metadata:

        frame_date = datetime.strptime(frame_metadata['sensing_time'], '%Y-%m-%dT%H:%M:%S')

        if start_date <= frame_date <= end_date and start_frame == -1:
            start_frame = frame_metadata['image_number']
        elif frame_date <= end_date and start_frame != -1:
            frame_count += 1
        elif frame_date > end_date:
            break

# Transform and CRS

Since we want to reference the image geologically, we look up the bounding box of the Sentinel 2 tile and find the crs. The transform to the bounding box and the crs are saved and will be applied in the last step. 

In [None]:
# get dataframe and EPSG projection for tile
response = requests.get(url=ENDPOINT_TILE_BOUNDS + f"/{tile_id}")

response_content = json.loads(response.content.decode("utf-8"))
epsg = response_content["utm"]["epsg"]
bbox = response_content["utm"]["bbox"]

# saving the transform and the crs to apply to the extracted frames later
transform = from_origin(bbox[0], bbox[3], spacial_dim[resolution], spacial_dim[resolution])
crs = CRS.from_string(epsg)

# Initializing the Image Loader

Up next is a rather technical task that needs to be executed. Feel free to not give this a closer look on your first read (and also any other time as well).

Briefly, this code lets us create *image_gen* in the next segment, which loads a new image for every iteration in the loop in the code next segment.

In [None]:
def find_width_height(probe_str: str):
    """
    This function looks through a string to find width and height in pixels

    :param probe_str:
    :type probe_str: str

    :return: - width (int), height (int)
    """
    _width, _height = -1, -1

    for element in probe_str.split(', '):
        try:
            (_width, _height) = list(map(int, element.split('x')))
            break
        except ValueError as ve:
            pass

    return _width, _height

def width_height_from_std_err(process: Popen):
    """
    This function reads through stderr of a ffmpeg command and
    calls find_width_height to find width and height of the images in the stream in pixels.

    :param process:
    :type process: Popen

    :return: - width (int), height (int)
    """
    _width, _height = -1, -1
    stderr_iterator = iter(process.stderr.readline, b"")

    for line in stderr_iterator:
        _width, _height = find_width_height(line.decode('utf-8'))
        if _width != -1 and _height != -1:
            break

    return _width, _height

def load_images_s2(_process: Popen, _width: int, _height: int):
    """
    This generator reads one sentinel 2 image from a stream on each __next__() and converts it into a
    numpy array.

    :param _process:
    :type _process: Popen
    :param _width:
    :type _width: int
    :param _height:
    :type _height: int

    :return: - next_image (np.array)
    """
    
    while True:
        expected_bands = 3
        creating_bands = 3
        pix_dtype = np.uint8
        bytes_per_pixel_per_band = np.dtype(pix_dtype).itemsize

        buffer = _process.stdout.read(_width * _height * bytes_per_pixel_per_band * expected_bands)

        if len(buffer) != _width * _height * bytes_per_pixel_per_band * creating_bands:
            break

        np_from_buffer = np.frombuffer(buffer, pix_dtype)
        img_h_w_rgb = np_from_buffer.reshape(_height, _width, creating_bands)
        next_image = np.moveaxis(img_h_w_rgb, 2, 0)

        del buffer
        del np_from_buffer
        gc.collect()

        yield next_image

    _process.stdout.close()
    
def ffmpeg_init(_stream_link: str, _start_frame: int, _frame_count: int):
    """
    This function initializes an image generator for sentinel 2 (load_images_s2).
    For this it accesses the stream (stream_link) seeks to the time of the start_frame
    and sets -t to the duration needed to load as many frames as frame_count.


    :param _stream_link:
    :type _stream_link: str
    :param _start_frame:
    :type _start_frame: int
    :param _frame_count:
    :type _frame_count: int

    :return: - load_images_s1() or load_images_s2() (generator)
    """
    command = ['ffmpeg', '-hide_banner',
               '-ss', f'{start_frame/FPS:.2f}',
               '-i', stream_link,
               '-pix_fmt', 'rgb24',
               '-f', 'rawvideo',
               '-t', f'{frame_count / FPS:.2f}',
               'pipe:']
    
    process = sp.Popen(command, stdout=sp.PIPE, stderr=sp.PIPE)
    width, height = width_height_from_std_err(process)
    process.stderr.close()
    
    if width == -1 or height == -1:
        raise ConnectionError('Stream not found. ')
    
    return load_images_s2(process, width, height)

# Creating Datasets and extracting to geotiffs

With the preparation done, we can finally get to the data. This step is done in one for loop as long as image_gen is loading new images. Loop variable frame_no is set to begin at start_frame and set to the frame number of the current image. 
 
After the image is read into a DataArray the CRS is applied and a transorm is made to complete geo-referencing the image. Setting up the DataArray is done with line 13.

Then we save the image- and geo-information in the DataArray to a geotiff.



In [None]:
# create the image loader
image_gen = ffmpeg_init(stream_link, start_frame, frame_count)

for frame_no, image in enumerate(image_gen, start=start_frame):
    
    current_frame_metadata = frames_metadata[frame_no]
    
    da_image = xa.DataArray(image, dims=['rgb', 'y', 'x'])

    # # Apply the crs of the selected tile to the Dataset
    da_image.rio.write_crs(input_crs=crs, inplace=True)
    #
    # # Apply the transform to the bounding box of the tile to the Dataset
    da_image.rio.write_transform(transform=transform, inplace=True)

    # your code

    # Save the image as a geotiff
    tif_name = f"{save_path}"
    tif_name += f"{tile_id}_"
    tif_name += f"{resolution}_"
    tif_name += f"{current_frame_metadata['sensing_time']}".replace(':', '').replace('-', '')
    tif_name += ".tiff"

    da_image.rio.to_raster(tif_name)

In [None]:
print('All done.')