Setting up the AWS CLI is only required once. From the terminal run
```aws configure```

In [1]:
import boto3
from pathlib import Path
import pprint

In [2]:
# If you have multiple aws credentials you can switch to the user profile 'omdena' like this:

if ('omdena' in boto3.session.Session().available_profiles):
    profile = 'omdena'
    print(f"Using profile: {profile}")
else:
    profile = 'default'
    print(f"Using profile: {profile}")
#profile = 'default' # overwrite if you want to use other profiles
boto3.setup_default_session(profile_name=profile)

Using profile: omdena


In [3]:
AWS_REGION = 'us-west-1'
BUCKET_NAME = 'omdena-gpsdd-senegal' # bucket_names may not include capital letters or underscores

In [4]:
data_folder = Path.cwd().parent.parent.joinpath('data')
data_folder

WindowsPath('G:/Omdena-food/GPSDD-Food-Security-Senegal/data')

In [5]:
root_folder = Path.cwd().parent.parent
root_folder

WindowsPath('G:/Omdena-food/GPSDD-Food-Security-Senegal')

In [6]:
s3_client = boto3.client('s3', region_name=AWS_REGION)

In [7]:
# try to create the bucket, this will fail if the bucket name is not globally unique
try:
    response = s3_client.create_bucket(
        Bucket=BUCKET_NAME,
        ACL='public-read',
        CreateBucketConfiguration={'LocationConstraint': AWS_REGION})
    bucket_url = response['Location']
    print(f"Bucket URL: {bucket_url}") 
except:
    print("Could not create bucket")

Could not create bucket


In [8]:
# List S3 buckets 
s3_client.list_buckets()['Buckets']

[{'Name': 'apify-wri-pdfs',
  'CreationDate': datetime.datetime(2020, 10, 31, 0, 15, 32, tzinfo=tzutc())},
 {'Name': 'elasticbeanstalk-us-east-2-867071081797',
  'CreationDate': datetime.datetime(2020, 12, 18, 18, 8, 57, tzinfo=tzutc())},
 {'Name': 'engiecoco',
  'CreationDate': datetime.datetime(2020, 8, 30, 3, 48, 41, tzinfo=tzutc())},
 {'Name': 'irap-example-data-lidar-data-croatia',
  'CreationDate': datetime.datetime(2020, 11, 27, 11, 28, 6, tzinfo=tzutc())},
 {'Name': 'irap-example-data-video-and-road-feature-data-mexico',
  'CreationDate': datetime.datetime(2020, 11, 27, 11, 25, 41, tzinfo=tzutc())},
 {'Name': 'omdena-engie-json-tests',
  'CreationDate': datetime.datetime(2020, 7, 25, 19, 8, 52, tzinfo=tzutc())},
 {'Name': 'omdena-gpsdd-senegal',
  'CreationDate': datetime.datetime(2020, 12, 6, 21, 55, tzinfo=tzutc())},
 {'Name': 'omdenazzapp',
  'CreationDate': datetime.datetime(2020, 8, 16, 6, 21, 34, tzinfo=tzutc())},
 {'Name': 'panoramic-data',
  'CreationDate': datetime.dat

In [9]:
# uploading objects to S3
file_name = 'omdena_logo.png'  # change this to the path of any local file that you want to upload
with open(file_name, "rb") as f:
    key = file_name # here you can rename the file or attach prefixes for folder structures
    s3_client.upload_fileobj(f, BUCKET_NAME, key)

## Creating a S3 resource

In [10]:
bucket = boto3.resource('s3').Bucket(name=BUCKET_NAME)

In [11]:
#prefix= "data/MYD11A2_Temperature_Senegal_regions/"
prefix = "data/Landsat_8_Region/"

In [12]:
keys = []
folders = []
files = []
for object_summary in bucket.objects.filter(Prefix=prefix):
    object_key = object_summary.key
    keys.append(object_key)
    if key.endswith("/"):
        folders.append(object_key)
    else:
        files.append(object_key)
pprint.pprint(files)

['data/Landsat_8_Region/',
 'data/Landsat_8_Region/dakar_region_2015.tif',
 'data/Landsat_8_Region/dakar_region_2016.tif',
 'data/Landsat_8_Region/dakar_region_2017.tif',
 'data/Landsat_8_Region/dakar_region_2018.tif',
 'data/Landsat_8_Region/diourbel_region_2015.tif',
 'data/Landsat_8_Region/diourbel_region_2016.tif',
 'data/Landsat_8_Region/diourbel_region_2017.tif',
 'data/Landsat_8_Region/diourbel_region_2018.tif',
 'data/Landsat_8_Region/fatick_region_2015.tif',
 'data/Landsat_8_Region/fatick_region_2016.tif',
 'data/Landsat_8_Region/fatick_region_2017.tif',
 'data/Landsat_8_Region/fatick_region_2018.tif',
 'data/Landsat_8_Region/kaffrine_region_2015.tif',
 'data/Landsat_8_Region/kaffrine_region_2016.tif',
 'data/Landsat_8_Region/kaffrine_region_2017.tif',
 'data/Landsat_8_Region/kaffrine_region_2018.tif',
 'data/Landsat_8_Region/kaolack_region_2015.tif',
 'data/Landsat_8_Region/kaolack_region_2016.tif',
 'data/Landsat_8_Region/kaolack_region_2017.tif',
 'data/Landsat_8_Region/kao

In [13]:
s3_resource = boto3.resource('s3')

In [14]:
def download_s3_folder(bucket_name, s3_folder, local_dir=None):
    """
    Download the contents of a folder directory
    Args:
        bucket_name: the name of the s3 bucket
        s3_folder: the folder path in the s3 bucket
        local_dir: a relative or absolute directory path in the local file system
    """
    s3_resource = boto3.resource('s3')
    bucket = s3_resource.Bucket(bucket_name)

    # extract folder structure 
    folders = []
    for obj in bucket.objects.filter(Prefix=s3_folder):
        obj_split = obj.key.split('/')
        if obj.key.endswith('/'): # do we still need this?
            #print(obj.keys) 
            folders.append(obj.key)
        elif (len(obj_split) > 1):
            subfolder = "/".join(obj_split[:-1])
            #print(subfolder)
            folders.append(subfolder)
        else:
            pass

    # create local folder structure if it doesn't exist
    folder_set = {*folders}
    #print(set(folder_set))
    for folder in folder_set:
        new_path= local_dir.joinpath(folder)
        print(new_path)
        new_path.mkdir(mode=0o777, parents=True, exist_ok=True)
        
    
    # download the files into the corresponding directories
    for obj in bucket.objects.filter(Prefix=s3_folder):
        if obj.key.endswith('/'): # important, because code will break if we try to write to a folder instead of a file
            pass
        else:
            obj_split = obj.key.split('/')
            if (len(obj_split) > 1):
                #print(obj_split)
                file_name = obj_split[-1]
                #print(f"Filename {file_name}")
                sub_folder = "/".join(obj_split[:-1])
                folder = local_dir.joinpath(sub_folder)
                file_path = folder.joinpath(file_name)
                #print(f"File path {file_path}")
                #print(obj)
                if file_path.is_file(): #TODO: this does not seem to work yet!
                    print(f"File {file_path} already exists and will not be downloaded.")
                else:
                    print(f"Downloading file to {file_path}")
                    #bucket.download_file(obj.key, obj.key)
                    bucket.download_file(obj.key, str(file_path))
    return

In [16]:
#download_s3_folder(BUCKET_NAME, prefix, root_folder)
download_s3_folder(BUCKET_NAME, prefix, data_folder)

G:\Omdena-food\GPSDD-Food-Security-Senegal\data\data\Landsat_8_Region
G:\Omdena-food\GPSDD-Food-Security-Senegal\data\data\Landsat_8_Region
File G:\Omdena-food\GPSDD-Food-Security-Senegal\data\data\Landsat_8_Region\dakar_region_2015.tif already exists and will not be downloaded.
File G:\Omdena-food\GPSDD-Food-Security-Senegal\data\data\Landsat_8_Region\dakar_region_2016.tif already exists and will not be downloaded.
File G:\Omdena-food\GPSDD-Food-Security-Senegal\data\data\Landsat_8_Region\dakar_region_2017.tif already exists and will not be downloaded.
File G:\Omdena-food\GPSDD-Food-Security-Senegal\data\data\Landsat_8_Region\dakar_region_2018.tif already exists and will not be downloaded.
File G:\Omdena-food\GPSDD-Food-Security-Senegal\data\data\Landsat_8_Region\diourbel_region_2015.tif already exists and will not be downloaded.
File G:\Omdena-food\GPSDD-Food-Security-Senegal\data\data\Landsat_8_Region\diourbel_region_2016.tif already exists and will not be downloaded.
File G:\Omdena

In [9]:
# listing all the objects in a bucket
for obj in bucket.objects.all():
    print(obj)

_senegal-tambacounda_2015-01-01_2016-01-01.tif')
s3.ObjectSummary(bucket_name='omdena-gpsdd-senegal', key='data/MYD11A2_Temperature_Senegal_regions/senegal_Temperature_senegal-thies_2015-01-01_2016-01-01.tif')
s3.ObjectSummary(bucket_name='omdena-gpsdd-senegal', key='data/MYD11A2_Temperature_Senegal_regions/senegal_Temperature_senegal-ziguinchor_2015-01-01_2016-01-01.tif')
s3.ObjectSummary(bucket_name='omdena-gpsdd-senegal', key='data/MYD11A2_Temperature_SouthSudan_counties/')
s3.ObjectSummary(bucket_name='omdena-gpsdd-senegal', key='data/MYD11A2_Temperature_SouthSudan_counties/SouthSudan_Temperature_abiemnhom_2013-01-01_2014-01-01.tif')
s3.ObjectSummary(bucket_name='omdena-gpsdd-senegal', key='data/MYD11A2_Temperature_SouthSudan_counties/SouthSudan_Temperature_abyei_2013-01-01_2014-01-01.tif')
s3.ObjectSummary(bucket_name='omdena-gpsdd-senegal', key='data/MYD11A2_Temperature_SouthSudan_counties/SouthSudan_Temperature_akobo_2013-01-01_2014-01-01.tif')
s3.ObjectSummary(bucket_name='omde