In [None]:
import boto3
import os
from botocore.exceptions import NoCredentialsError, ClientError
from collections import defaultdict


def upload_files_to_s3(files, bucket_name, folder_name,temp_creds, region = 'us-west-2'):
    # Create a session with temporary credentials
    session = boto3.Session(
        aws_access_key_id=temp_creds["aws_access_key_id"],
        aws_secret_access_key=temp_creds["aws_secret_access_key"],
        aws_session_token=temp_creds["aws_session_token"],
        region_name=region
    )

    # Create an S3 client using the session
    s3 = session.client('s3')

    def folder_exists(bucket, folder):
        try:
            result = s3.list_objects_v2(Bucket=bucket, Prefix=folder)
            return 'Contents' in result
        except Exception as e:
            print(f"Error checking folder existence: {e}")
            return False

    # Create folder if it does not exist
    if not folder_exists(bucket_name, folder_name):
        try:
            s3.put_object(Bucket=bucket_name, Key=(folder_name + '/'))
            print(f"Folder '{folder_name}' created in bucket '{bucket_name}'.")
        except Exception as e:
            print(f"Error creating folder: {e}")
            return

    # Upload each file to the specified folder
    for file in files:
        try:
            file_name = os.path.basename(file)
            s3.upload_file(file, bucket_name, f"{folder_name}/{file_name}")
            print(f"File '{file_name}' uploaded to '{folder_name}' in bucket '{bucket_name}'.")
        except Exception as e:
            print(f"Error uploading file '{file}': {e}")


def get_files_with_substring(folder_path, substring):
    files = os.listdir(folder_path)
    matching_files = [f for f in files if f.startswith(substring)]
    full_paths = [os.path.join(folder_path, f) for f in matching_files]
    return full_paths

def select_files_except_tif(folder_path):
    selected_files = []
    
    for file_name in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file_name)
        if os.path.isfile(file_path) and not file_name.endswith('.tif'):
            selected_files.append(file_path)
    
    return selected_files


def group_files_by_prefix(files, prefix_length):
    grouped_files = defaultdict(list)
    
    # Iterate over all files in the directory
    for file_name in files:
        # Get the full path of the file
        file_path = file_name
        
        # Ensure it's a file and not a directory
        if os.path.isfile(file_path):
            # Extract the prefix (first few characters) from the filename
            prefix = file_name.split("/")[-1][:prefix_length]
            
            
            # Group files by their prefix
            grouped_files[prefix].append(file_path)
    
    return grouped_files

# Example usage:
if __name__ == "__main__":
    temp_creds = {}
    bucket_name = 'sce.sentinel2'

    base_path =  'images/'

    file_paths = select_files_except_tif(base_path)

    grouped_files = group_files_by_prefix(file_paths, 18)

    for key in grouped_files.keys():
        s3_folder_name = "_".join(key.split("_")[0:6])
        s3_folder_name = "mesma/" + s3_folder_name
        print(grouped_files[key], s3_folder_name)
        print()
        upload_files_to_s3(grouped_files[key], bucket_name, s3_folder_name, temp_creds)

['images/ 10_S_GG_2024_8_18_mesma_20250418T134733_rmse', 'images/ 10_S_GG_2024_8_18_mesma_20250418T134733_fractions', 'images/ 10_S_GG_2024_8_18_mesma_20250418T134733.aux.xml', 'images/ 10_S_GG_2024_8_18_mesma_20250418T134733_fractions.hdr', 'images/ 10_S_GG_2024_8_18_mesma_20250418T134733', 'images/ 10_S_GG_2024_8_18_mesma_20250418T134733_rmse.hdr', 'images/ 10_S_GG_2024_8_18_mesma_20250418T134733.hdr', 'images/ 10_S_GG_2024_8_18_mesma_20250418T134733_rmse.aux.xml', 'images/ 10_S_GG_2024_8_18_mesma_20250418T134733_fractions.aux.xml'] mesma/ 10_S_GG_2024_8_18

Folder 'mesma/ 10_S_GG_2024_8_18' created in bucket 'sce.sentinel2'.
File ' 10_S_GG_2024_8_18_mesma_20250418T134733_rmse' uploaded to 'mesma/ 10_S_GG_2024_8_18' in bucket 'sce.sentinel2'.
File ' 10_S_GG_2024_8_18_mesma_20250418T134733_fractions' uploaded to 'mesma/ 10_S_GG_2024_8_18' in bucket 'sce.sentinel2'.
File ' 10_S_GG_2024_8_18_mesma_20250418T134733.aux.xml' uploaded to 'mesma/ 10_S_GG_2024_8_18' in bucket 'sce.sentinel2'.