In [None]:
from google.cloud import storage_transfer

In [1]:
import os
# all for requests
import requests

import json
import geopandas as gpd
import rasterio
import matplotlib.pyplot as plt
import numpy as np

In [2]:
# Environment variables
datasets_dir = os.environ.get('DATA_DIR_LOCAL')
DATA_FOLDER = os.path.join(datasets_dir, "RPG/PaysBasque")
source_data = gpd.read_file(os.path.join(DATA_FOLDER, "contour-du-pays-basque.geojson"))

In [3]:
bbox = list(source_data.geometry.values[0].bounds)
date_range = "2019-08-01T00:00:00Z/2019-08-31T12:31:12Z"
year = 2019

In [36]:
bbox

[-1.792350952083531, 42.95072315985167, -0.745872490046169, 43.547254988966124]

In [2]:
assets_to_download = ['thumbnail', 'tileinfo_metadata', 'granule_metadata', 'red','green', 'blue', 'green', 'nir', 'nir08', 'nir09',  'scl', 'visual','swir16', 'swir22', 'wvp']

In [6]:
def get_satellite_data_per_trimester(bbox, year, trimester, limit=200):
    '''return urls of all channels for all year in all the bbox
    returns urls, properties_dict'''
    url = 'https://earth-search.aws.element84.com/v1/search'
    trimester_dateranges = {
        1: f"{year}-01-01T00:00:00Z/{year}-03-31T23:59:59Z",
        2: f"{year}-04-01T00:00:00Z/{year}-06-30T23:59:59Z",
        3: f"{year}-07-01T00:00:00Z/{year}-09-30T23:59:59Z",
        4: f"{year}-10-01T00:00:00Z/{year}-12-31T23:59:59Z",
    }
    data={"bbox": bbox,
         "datetime": trimester_dateranges[trimester],
         "collections":["sentinel-2-l2a"],
         "limit": limit}
    response = requests.post(url, json=data).json()
    print(f"Number of matches: {response['numberMatched']}, number returned = {response['numberReturned']}")
    urls = []
    properties_dict = {}
    for feature in response['features']:
        urls.extend(get_urls_from_eo_feature(feature))
        properties = eo_feature2properties_dict(feature)
        properties_dict[properties['id']]=properties
    return urls, properties_dict

In [7]:
def get_satellite_data_per_month(bbox, year, month, limit=100):
    '''return urls of all channels for all year in all the bbox
    returns urls, properties_dict'''
    url = 'https://earth-search.aws.element84.com/v1/search'
    month_dateranges = {
        1: f"{year}-01-01T00:00:00Z/{year}-01-31T23:59:59Z",
        2: f"{year}-02-01T00:00:00Z/{year}-02-28T23:59:59Z",
        3: f"{year}-03-01T00:00:00Z/{year}-03-31T23:59:59Z",
        4: f"{year}-04-01T00:00:00Z/{year}-04-30T23:59:59Z",
        5: f"{year}-05-01T00:00:00Z/{year}-05-31T23:59:59Z",
        6: f"{year}-06-01T00:00:00Z/{year}-06-30T23:59:59Z",
        7: f"{year}-07-01T00:00:00Z/{year}-07-31T23:59:59Z",
        8: f"{year}-08-01T00:00:00Z/{year}-08-31T23:59:59Z",
        9: f"{year}-09-01T00:00:00Z/{year}-09-30T23:59:59Z",
        10: f"{year}-10-01T00:00:00Z/{year}-10-31T23:59:59Z",
        11: f"{year}-11-01T00:00:00Z/{year}-11-30T23:59:59Z",
        12: f"{year}-12-01T00:00:00Z/{year}-12-31T23:59:59Z"
    }
    data={"bbox": bbox,
         "datetime": month_dateranges[month],
         "collections":["sentinel-2-l2a"],
         "limit": limit}
    response = requests.post(url, json=data).json()
    print(f"Number of matches: {response['numberMatched']}, number returned = {response['numberReturned']}")
    urls = []
    properties_dict = {}
    for feature in response['features']:
        urls.extend(get_urls_from_eo_feature(feature))
        properties = eo_feature2properties_dict(feature)
        properties_dict[properties['id']]=properties
    return urls, properties_dict

In [8]:
def get_satellite_data(bbox, year, limit=200):
    urls = []
    properties_dict = {}
    for month in range(1, 13):
        month_urls, month_properties = get_satellite_data_per_month(bbox, year, month, limit)
        urls.extend(month_urls)
        properties_dict.update(month_properties)
    return urls, properties_dict

In [9]:
def get_urls_from_eo_feature(eo_feature):
    urls = []
    for asset in assets_to_download:
        urls.append(eo_feature['assets'][asset]['href'])
    return urls

In [10]:
def eo_feature2properties_dict(eo_feature):
    ''' From eo_feature json (obtained from eart-search request) create dict of properties'''
    properties_dict = {
        'id':eo_feature['id'],
        'datetime': eo_feature['properties']['datetime'],
        'crs': eo_feature['properties']['proj:epsg'],
        'geometry': eo_feature['geometry'],
        'bbox': eo_feature['bbox'],
        'assets_list': assets_to_download,
        'urls_list': get_urls_from_eo_feature(eo_feature)
    }
    return properties_dict

In [11]:
def urls2file(file_path, urls_list):
    with open(file_path, "w") as file:
        file.writelines("TsvHttpData-1.0\n")
        for url in urls_list:
            file.writelines(url+"\n")

In [12]:
urls2file("../url_list_file.tsv", urls)

NameError: name 'urls' is not defined

In [13]:
bbox_landes = [-1.52487, 43.487949, 0.136726,44.532196]
bbox_landes

[-1.52487, 43.487949, 0.136726, 44.532196]

In [14]:
urls, properties_dict = get_satellite_data(bbox_landes, 2019, limit=160)
#urls2file("../url_list_file.tsv", urls)
with open("../sat_data_properties.json", "w") as file:
    json.dump(properties_dict, file)
upload_to_storage_and_return_token("../url_list_file.tsv", "test_download_from_url/url_list_file-2.tsv", "satellite_crops")

Number of matches: 54, number returned = 54
Number of matches: 51, number returned = 51
Number of matches: 49, number returned = 49
Number of matches: 48, number returned = 48
Number of matches: 50, number returned = 50
Number of matches: 58, number returned = 58
Number of matches: 48, number returned = 48
Number of matches: 50, number returned = 50
Number of matches: 46, number returned = 46
Number of matches: 52, number returned = 52
Number of matches: 87, number returned = 87
Number of matches: 96, number returned = 96


In [18]:
with open("../sat_data_properties.json", "r") as file:
    sat_data_properties = json.load(file)

In [25]:
for key in sat_data_properties.keys():
    if key.startswith("S2B_30TXP_2019"):
        print(sat_data_properties[key]['bbox'])
        break

[-1.0266214817510957, 43.23561023331161, -0.37336693317772074, 44.230355918640115]


In [26]:
from google.cloud.storage import Client

client = Client()
bucket = self.client.bucket(self.bucket_name)

def list_dir(self, dir_path):
    prefix=dir_path+"/"
    return self.client.list_blobs(self.bucket_name, prefix=prefix)

# Automatically download from urls on bucket

TsvHttpData-1.0
https://example.com/buckets/obj1      1357      wHENa08V36iPYAsOa2JAdw==
https://example.com/buckets/obj2      2468      R9acAaveoPd2y8nniLUYbw==


In [21]:
from datetime import datetime
from google.cloud import storage

def create_one_time_http_transfer(
    description: str,
    list_url: str,
    sink_bucket: str,
    project_id: str="satellite-crops"
):
    """Creates a one-time transfer job from Amazon S3 to Google Cloud
    Storage."""
    storage_client = storage.Client.from_service_account_json("../.secrets/satellite-crops-e2c6e26ef012.json")
    # the same time creates a one-time transfer
    one_time_schedule = {"day": now.day, "month": now.month, "year": now.year}

    transfer_job_request = storage_transfer.CreateTransferJobRequest(
        {
            "transfer_job": {
                "project_id": project_id,
                "description": description,
                "status": storage_transfer.TransferJob.Status.ENABLED,
                "schedule": {
                    "schedule_start_date": one_time_schedule,
                    "schedule_end_date": one_time_schedule,
                },
                "transfer_spec": {
                    "http_data_source": storage_transfer.HttpData(list_url=list_url),
                    "gcs_data_sink": {
                        "bucket_name": sink_bucket,
                    },
                },
            }
        }
    )

    result = client.create_transfer_job(transfer_job_request)
    print(f"Created transferJob: {result.name}")

def upload_to_storage_and_return_token(
    file_input_path: str, file_output_path: str, bucket_name: str
) -> str:
    gcs = storage.Client.from_service_account_json("../.secrets/satellite-crops-e2c6e26ef012.json")
    # # Get the bucket that the file will be uploaded to.
    bucket = gcs.bucket(bucket_name)
    # # Create a new blob and upload the file's content.
    blob = bucket.blob(file_output_path)
    blob.upload_from_filename(file_input_path)
    return blob.generate_signed_url(datetime.now())

In [66]:
upload_to_storage_and_return_token("../url_list_file.tsv", "test_download_from_url/url_list_file-2.tsv", "satellite_crops")

'https://storage.googleapis.com/satellite_crops/test_download_from_url/url_list_file-2.tsv?Expires=1717772321&GoogleAccessId=satellite-crops%40appspot.gserviceaccount.com&Signature=PiRFsJt%2FpMxOa8zyWtAH%2BXGAl4ogsZE98sQRCW9t3a9H1cVrvRbhQaKrZZqXR0PbI7PI5b30YISEvSgsmODSy722uSP03J8Qe4bqAUk2Gh1UK2Y3geuGtxoDj%2FpD6L106PCSNvTCToUKtK%2BX8BYtdssSmzvhIxJXD0LJJ5CikmQqhMchSYXKoM4XqAySNAcRZJ3jBmTLs8V2vSIfDLW3819SSbISHbrq2FeJ2ucxPsgojfxrubYalahlMvstnuwwuiogPAb5maLjYMX29WClcDctMbWwq7ZuBIl09VRxZf0dPHRohkmYLh02%2FwMdB3crcxDpWwyZsj8ZLew9pH2zPQ%3D%3D'