# How to access open Earth observation training data
**Assessment One**

## Introduction
In this notebook, you will follow steps to download geospatial training data from [Radiant MLHub](https://www.radiant.earth/mlhub/).

## Enter required data

In [None]:
# Copy and paste the access token from http://dashboard.mlhub.earth/
ACCESS_TOKEN = ''
collectionId = 'ref_african_crops_uganda_01' # Collection ID for Dalberg Data Insights Crop Type Uganda

# Set limit to number between 1 and 10000
limit = 1000

# Enter name of folder where your want to save your data
folder_name = ''

# Get AWS_ACCESS_KEY_ID and AWS_SECRET_KEY from AWS
AWS_ACCESS_KEY_ID = ''
AWS_SECRET_KEY = ''

## Authentication

In [None]:
# only the requests module is required to access the API
import requests

headers = {
    'Authorization': f'Bearer {ACCESS_TOKEN}',
    'Accept':'application/json'
}

## Parameters

In [None]:

# use these optional parameters to control what items are returned. maximum limit is 10000
limit = 10
bounding_box = []
date_time = []

# retrieves the items and their metadata in the collection
r = requests.get(f'https://api.radiant.earth/mlhub/v1/collections/{collectionId}/items', params={'limit':limit, 'bbox':bounding_box,'datetime':date_time},headers=headers)
collection = r.json()

## Functions to download the labels and images

In [None]:
import boto3 # Required to download assets hosted on S3 
import os 
from urllib.parse import urlparse



s3 = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_KEY)

def download_s3(uri, folder_name):
    
    parsed_url = urlparse(uri)    
    bucket = parsed_url.hostname.split('.')[0]       
    path = parsed_url.path[1:]  
    
    
    new_path = os.path.dirname(path)
    print(new_path)
    comps = new_path.split('/')[4:7]
    dates = (f'{comps[0]}-{comps[1]}-{comps[2]}')
    
    #local_path = f'{folder_name}/{new_path}'
    local_path = f'{folder_name}/images/{dates}'
    
    print(local_path)
    
    
    #file_path = os.path.dirname(local_path)
    if not os.path.exists(local_path):
        os.makedirs(local_path)
        
    filename = path.split('/')[-1]       
    
    s3.download_file(bucket, path, f'{local_path}/{filename}', ExtraArgs={'RequestPayer': 'requester'})
    print(f'Downloaded s3://{bucket}/{path}')
   
    
def get_download_uri(uri):
    r = requests.get(uri, allow_redirects=False)
    return r.headers['Location']

def download(href, folder_name):
    download_uri = get_download_uri(href)
    parsed = urlparse(download_uri)
    print(download_uri)
    
    download_s3(download_uri, folder_name)
 

def download_source_and_labels(item, folder_name):
    
    links = item.get('links')
    count = 0
    #Download the source imagery
    for link in links:
        if link['rel'] != 'source':
            continue
        count += 1
        r = requests.get(link['href'], headers=headers)
        
        for key, asset in r.json()['assets'].items():
            download(asset['href'], folder_name)
    print(count)
            
def get_labels(uri, folder_name, classes=None, max_items_downloaded=None, items_downloaded=0):
    r = requests.get(uri, headers=headers)
    collection = r.json()
    
    for feature in collection.get('features', []):
        labels = feature.get('assets').get('labels')     
       
        # Download the labels
        download(labels['href'],folder_name)
        
        
def get_imagery(uri, folder_name, max_items_downloaded, classes=None, items_downloaded=0):
    
    r = requests.get(uri, headers=headers)
    collection = r.json()    
    
    
    for feature in collection.get('features', []):
       
        # Download the label and source imagery for the item
        download_source_and_labels(feature, folder_name)
        
        # Stop downloaded items if we reached the maximum we specify
        items_downloaded += 1
        print(items_downloaded)
        if max_items_downloaded is not None and items_downloaded >= max_items_downloaded:
            print("hereii")
            return      

    # Get the next page if results, if available
    
    print(collection)
    print('fini')
    for link in collection.get('links', []):
        print("here")
        print(link)
        if link['rel'] == 'next' and link['href'] is not None:
            get_items(link['href'], folder_name, classes=classes, max_items_downloaded=max_items_downloaded, 
                      items_downloaded=items_downloaded)
            


## Download Labels

In [None]:
get_labels(f'https://api.radiant.earth/mlhub/v1/collections/{collectionId}/items', folder_name, max_items_downloaded=1000)

## Download Source Imagery

In [None]:
get_imagery(f'https://api.radiant.earth/mlhub/v1/collections/{collectionId}/items', folder_name, max_items_downloaded=1000)