## 1_download_VIDA_datasets
### This notebook downloads the building footprint catalog provided by VIDA for a given country in a Parquet format

### Initial configuration
#### To start working with this particular notebook, you need to provide necessary credential and settings
#### Below is an template of configuration, which is necessary prepare aside of this notebook and copy & paste all content in triple quotes to the next cell's input field
    """
    {
    "COS_ENDPOINT_URL": "s3.private.eu-de.cloud-object-storage.appdomain.cloud",
    "COS_AUTH_ENDPOINT_URL": "https://iam.cloud.ibm.com/oidc/token",
    "COS_APIKEY": "xxx",
    "COUNTRY_NAME": "Kenya",
    "VIDA_PARQUET_BUCKET": "parquets"
    }
    """


In [None]:
# Read notebook configuration
import getpass
import json

config_str = getpass.getpass('Enter your prepared config: ')
config = json.loads(config_str)

In [None]:
# Import necessary libraries
import requests
import os
from botocore.client import Config
import ibm_boto3

In [None]:
# countries ISO mapper - Add new countries if needed
country_mapper = {
    'Kenya': 'KEN',
    'India': "IND"
}

In [None]:
# init S3 client in order to work with last tiff file version
cos_client = ibm_boto3.client(service_name='s3',
                              ibm_api_key_id=config["COS_APIKEY"],
                              ibm_auth_endpoint=config["COS_AUTH_ENDPOINT_URL"],
                              config=Config(signature_version='oauth'),
                              endpoint_url=config["COS_ENDPOINT_URL"])

In [None]:
def download_country_parquet(country:str, directory:str, target_bucket=None) -> None:
    '''
        This function is aimed for downloading VIDA geoparquets from data.source.coop
        Input positional arguments:
            1. country -> country name, can be Kenya or India, in case there your desired country isn't present in thin function,
            just add a new "Country name":"Country ISO CODE" pair to the 'country_mapper' dictionary
            2. directory -> target directory where desired parquet will be saved
            3. target_bucket -> (optional) if defined the downloaded parquet will be uploaded to the bucket assigned to this argumemt
        
    '''
    
    # check desired directory existence
    if os.path.exists(directory):
        print(f'\033[92mDirectory: "{directory}" exists')
        
    else:
        print(f'\033[93mTarget directory not exists, creating...')
        
        try:
            os.makedirs(directory)
            print(f'\033[92mDirectory "{directory}" successfully created')
            
        except Exception as e:
            print(f"\033[91mError occurred while creating directory {directory} \n Error: {str(e)}")
    
    # assembly final url
    country_iso = country_mapper[country]
    url = f'https://data.source.coop/vida/google-microsoft-open-buildings/geoparquet/by_country/country_iso={country_iso}/{country_iso}.parquet'
    
    # get file size
    response = requests.head(url, allow_redirects=True)
    size = response.headers.get('content-length', -1)

    # size in megabytes
    print('FILE SIZE: {:.2f} MB'.format(int(size) / float(1 << 20)))
    
    # download file
    response = requests.get(url)
    
    if response.status_code == 200:
        
        filename = f"{country}.parquet"
        file_path = os.path.join(directory, filename)
        with open(file_path, "wb") as file:
            file.write(response.content)
            print(f"\033[92mFile: {filename} downloaded successfully!")
    else:
        print("\033[91mFailed to download the file.")
    
    # optionaly upload file to the bucket
    if type(target_bucket) == str:
        
        try:
            cos_client.upload_file(
                Filename=file_path,
                Bucket=target_bucket,
                Key=filename,
                ExtraArgs={'ContentDisposition': 'attachment'}
            )
            
            print(f'File {filename} successfully uploaded to the COS {target_bucket} bucket')
        except Exception as e:
            print(f"\033[91mFailed upload file to the bucket {target_bucket}. Error: {e}")

In [None]:
download_country_parquet(config["COUNTRY_NAME"], config["VIDA_PARQUET_BUCKET"])