# Uploading files

This notebook uploads the files that failed to be uploaded. Before running it, make sure that the AWS configuration token has been refresh. 

To do so, run : ``aws sso login --sso-session fadel-session`` where 'fadel-session' is the name of your session. If you don't remember, open the file ``~/.aws/config``.

In [None]:
# install boto3 and tqdm
%pip install boto3[crt]
%pip install tqdm

In [None]:
# Get your configuration info
%pycat ~/.aws/config

In [None]:
# Run the line below to renew the access to aws: replace ``fadel-session`` with the name of your session from the cell above.
!aws sso login --sso-session fadel-session

In [None]:
# Imports
import boto3
import os
from tqdm import tqdm
from pathlib import Path

In [None]:
# Authenticate
PROFILE_NAME = 'my-dev-profile' #TODO: update with your profile
my_session = boto3.session.Session(profile_name=PROFILE_NAME)

In [None]:
# Print out bucket names
s3_resources = my_session.resource('s3')
s3_client = my_session.client('s3')
for bucket in s3_resources.buckets.all():
    print(bucket.name)

In [None]:
def upload_file(file_name, bucket:str, root_dir_path:str):
    """Upload a file to an S3 bucket
        file_name (Path): relative path to root_dir_path
        bucket (str): name of bucket
        root_dir_path (Path): root directory
    """
    # get path of file
    object_name = os.path.join(root_dir_path,file_name)
    # Upload the file
    s3_client = my_session.client('s3')
    response = s3_client.upload_file(object_name, bucket, file_name)

### Get failed multipart uploads

In [None]:
# get failed multipart uploads
root_dir = ... # path to the folder hosting all your files
bucket_name = 'aerialcountdata'
results = s3_client.list_multipart_uploads(Bucket=bucket_name)
filesToUpload = []
for failed_upload in results['Uploads']:
    # print(failed_upload['Key'])
    # print(failed_upload)
    filesToUpload.append(failed_upload['Key'])

## print failed multi-parts upload
# for a in filesToUpload:
#     print(a)

### Get files in directory

In [None]:
# Define files to upload
root_dir = ... # Absolute path to the folder hosting all your files
bucket_name = 'aerialcountdata'
filesToUpload = list(Path(root_dir).rglob('*'))

### Upload files
Only non-existing files are uploaded

In [None]:
# Upload files
s3 = my_session.resource('s3')
num_exists = 0
num_uploads = 0
for file in tqdm(filesToUpload,desc='Uploading files'):

    # check type
    if not isinstance(file,Path):
        file = Path(file)

    #  check if file already exists
    try:
        file = file.relative_to(Path(root_dir)).as_posix()
        s3.Object(bucket_name, file).load()
        num_exists += 1
        continue

    ##  upload file
    except Exception as e:
        if Path(os.path.join(root_dir,file)).is_file():
            upload_file(file_name=file,root_dir_path=root_dir,bucket=bucket_name)
            num_uploads += 1


In [None]:
# Example with dummy data upload files
# dummyfile = "collage_elephants.PNG"
# root_dir = '../../' # path to the folder hosting all your files
# bucket_name = 'wildaidata-test' # 'aerialcountdata'
# upload_file(file_name=dummyfile,root_dir_path=root_dir,bucket=bucket_name)

In [None]:
count