In [1]:
import os
import json5
from tqdm import tqdm
import boto3
import yaml
from datetime import datetime
from botocore.exceptions import NoCredentialsError, ClientError

In [2]:
def load_config(config_file):
    # Load configuration from the YAML file
    with open(config_file, 'r') as file:
        config = yaml.safe_load(file)
    return config

In [3]:
def write_log(config, log_file_name, log_text):
    # Extract parameters from the configuration
    region_name = config['aws']['region_name']
    log_bucket_name = config['aws']['log_bucket_name']
    log_path = config['aws']['log_path']
    aws_access_key_id = config['aws']['aws_access_key_id']
    aws_secret_access_key = config['aws']['aws_secret_access_key']

    # Create an S3 client with the specified credentials
    s3_client = boto3.client(
        's3',
        aws_access_key_id=aws_access_key_id,
        aws_secret_access_key=aws_secret_access_key,
        region_name=region_name
    )

    # Construct the S3 key for the log file
    s3_key = f"{log_path}/{log_file_name}"

    # Get the current system time
    current_time = datetime.now().strftime("%Y/%m/%d %H:%M:%S")

    # Create a CSV row with the current time and log text
    log_row = [current_time, log_text]
    
    # Try to download the existing log file, if it exists
    try:
        # Read the existing log file from S3
        response = s3_client.get_object(Bucket=log_bucket_name, Key=s3_key)
        existing_log = response['Body'].read().decode('utf-8')
        
        # Append the new log row to the existing log content
        log_content = existing_log + "\n" + ",".join(log_row)

    except s3_client.exceptions.NoSuchKey:
        # If the file does not exist, create a new log content with headers
        log_content = "Start Log\n" + ",".join(log_row)

    # Write the updated log content back to S3
    try:
        # Convert log content to bytes and upload it
        s3_client.put_object(Bucket=log_bucket_name, Key=s3_key, Body=log_content.encode('utf-8'))
        #print(f"Log entry added to {log_bucket_name}/{s3_key}")
    except (NoCredentialsError, ClientError) as e:
        print(f"Failed to write log entry to {log_bucket_name}/{s3_key}: {e}") 

In [4]:
def upload_folders_to_s3(config, sources_folder_name, target_region_name, target_bucket_name, target_path, log_file='upload_folders'):
    
    # Extract parameters from the configuration
    aws_access_key_id = config['aws']['aws_access_key_id']
    aws_secret_access_key = config['aws']['aws_secret_access_key']
    log_file = log_file+'.csv'
    
    # Create an S3 client with the specified credentials
    s3_client = boto3.client(
        's3',
        aws_access_key_id=aws_access_key_id,
        aws_secret_access_key=aws_secret_access_key,
        region_name=target_region_name
    )

    # Progress bar class for tracking each file
    class ProgressPercentage(object):
        def __init__(self, filename):
            self._filename = filename
            self._seen_so_far = 0
            self._total = os.path.getsize(filename)
            # Initialize the tqdm progress bar for the current file
            print(f"Uploading {filename}:")
            self._pbar = tqdm(total=self._total, unit='B', unit_scale=True)

        def __call__(self, bytes_amount):
            # Update progress
            self._seen_so_far += bytes_amount
            self._pbar.update(bytes_amount)
            # Close the progress bar when done
            if self._seen_so_far >= self._total:
                self._pbar.close()

    # Traverse through all files and subfolders in the given folder
    for root, dirs, files in os.walk(sources_folder_name):
        for file in files:
            # Full path of the file
            local_file_path = os.path.join(root, file)
            
            # Construct the S3 key by replacing the base folder path with the target_path in S3
            relative_path = os.path.relpath(local_file_path, sources_folder_name)
            s3_key = os.path.join(target_path, relative_path).replace("\\", "/")
            try:
                # Upload the file with progress callback
                s3_client.upload_file(
                    local_file_path,
                    target_bucket_name,
                    s3_key,
                    Callback=ProgressPercentage(local_file_path)
                )
                # Log the successful upload
                log_text = f"{target_bucket_name},{s3_key},{file},Success"
                write_log(config, log_file, log_text)
            except FileNotFoundError:
                print(f"The file {local_file_path} was not found")
                # Log the failure
                log_text = f"{target_bucket_name},{s3_key},{file},FileNotFoundError"
                write_log(config, log_file, log_text)
            except NoCredentialsError:
                print("Credentials not available")
                # Log the failure
                log_text = f"{target_bucket_name},{s3_key},{file},NoCredentialsError"
                write_log(config, log_file, log_text)
            except ClientError as e:
                print(f"Failed to upload {local_file_path} to {sources_bucket_name}/{s3_key}: {e}")
                # Log the failure with error message
                log_text = f"{target_bucket_name},{s3_key},{file},ClientError: {e}"
                write_log(config, log_file, log_text)
            print()

In [5]:
def main():
    """
    Upload local folders - Main entry point
    """    
    print("Current working directory:", os.getcwd())
    
    try:
        # Extract parameters from the configuration
        config_file = 'config/config.yaml'
        config = load_config(config_file)        

        # Source Folder and sub-folders to upload
        sources_folder_name = config['local']['sources_folder_name']
        
        # Target aws s3
        target_region_name = config['aws']['region_name']
        target_bucket_name = config['aws']['sources_bucket_name']
        target_path = config['aws']['sources_path']    
        log_file = 'upload_sources'
        
        upload_folders_to_s3(config, sources_folder_name, target_region_name, target_bucket_name, target_path, log_file)       
    
    except Exception as e:
        print(f"Main process error: {str(e)}")
        raise

if __name__ == "__main__":
    main()

Current working directory: C:\github_repos\BIU_LLM_Project
Uploading L:/My Drive/Source_Files\lior_test.txt:


100%|████████████████████████████████████████████████████████████████████████████████| 27.0/27.0 [00:00<00:00, 52.9B/s]



Uploading L:/My Drive/Source_Files\desktop.ini:


100%|█████████████████████████████████████████████████████████████████████████████████| 246/246 [00:00<00:00, 5.04kB/s]



Uploading L:/My Drive/Source_Files\DOC\BI Fin Ops Sylabus.docx:


100%|█████████████████████████████████████████████████████████████████████████████| 3.76M/3.76M [00:00<00:00, 18.3MB/s]



Uploading L:/My Drive/Source_Files\DOC\DevSecOps Sylabus.docx:


100%|█████████████████████████████████████████████████████████████████████████████| 2.07M/2.07M [00:00<00:00, 8.98MB/s]



Uploading L:/My Drive/Source_Files\DOC\DMP Sylabus.docx:


100%|█████████████████████████████████████████████████████████████████████████████| 1.38M/1.38M [00:00<00:00, 9.40MB/s]



Uploading L:/My Drive/Source_Files\DOC\Product Management Sylabus.docx:


100%|█████████████████████████████████████████████████████████████████████████████| 1.74M/1.74M [00:00<00:00, 8.91MB/s]



Uploading L:/My Drive/Source_Files\DOC\Data Science Sylabus.docx:


100%|█████████████████████████████████████████████████████████████████████████████| 1.76M/1.76M [00:00<00:00, 8.58MB/s]



Uploading L:/My Drive/Source_Files\DOC\QA Sylabus.docx:


100%|█████████████████████████████████████████████████████████████████████████████| 2.27M/2.27M [00:00<00:00, 13.9MB/s]



Uploading L:/My Drive/Source_Files\DOC\CDAIO-CDO Sylabus.docx:


100%|█████████████████████████████████████████████████████████████████████████████| 5.08M/5.08M [00:00<00:00, 19.5MB/s]



Uploading L:/My Drive/Source_Files\DOC\CND Sylabus.docx:


100%|█████████████████████████████████████████████████████████████████████████████| 6.17M/6.17M [00:00<00:00, 18.6MB/s]



Uploading L:/My Drive/Source_Files\DOC\CISO Sylabus.docx:


100%|█████████████████████████████████████████████████████████████████████████████| 1.37M/1.37M [00:00<00:00, 11.4MB/s]



Uploading L:/My Drive/Source_Files\DOC\desktop.ini:


100%|█████████████████████████████████████████████████████████████████████████████████| 246/246 [00:00<00:00, 6.04kB/s]



Uploading L:/My Drive/Source_Files\PDF\CDAIO-CDO Intro.pdf:


100%|█████████████████████████████████████████████████████████████████████████████| 2.53M/2.53M [00:00<00:00, 8.90MB/s]



Uploading L:/My Drive/Source_Files\PDF\BI Fin Ops Sylabus.pdf:


100%|███████████████████████████████████████████████████████████████████████████████| 699k/699k [00:00<00:00, 3.47MB/s]



Uploading L:/My Drive/Source_Files\PDF\CDAIO-CDO Sylabus.pdf:


100%|███████████████████████████████████████████████████████████████████████████████| 900k/900k [00:00<00:00, 4.45MB/s]



Uploading L:/My Drive/Source_Files\PDF\QA Sylabus.pdf:


100%|███████████████████████████████████████████████████████████████████████████████| 753k/753k [00:00<00:00, 3.84MB/s]



Uploading L:/My Drive/Source_Files\PDF\Product Management Sylabus.pdf:


100%|███████████████████████████████████████████████████████████████████████████████| 815k/815k [00:00<00:00, 3.96MB/s]



Uploading L:/My Drive/Source_Files\PDF\DS & AI for Managers Sylabus.pdf:


100%|███████████████████████████████████████████████████████████████████████████████| 112k/112k [00:00<00:00, 1.41MB/s]



Uploading L:/My Drive/Source_Files\PDF\CISCO CCNA Sylabus.pdf:


100%|███████████████████████████████████████████████████████████████████████████████| 214k/214k [00:00<00:00, 1.32MB/s]



Uploading L:/My Drive/Source_Files\PDF\Check Point CCSA Sylabus.pdf:


100%|███████████████████████████████████████████████████████████████████████████████| 168k/168k [00:00<00:00, 1.21MB/s]



Uploading L:/My Drive/Source_Files\PDF\Data Science Sylabus.pdf:


100%|███████████████████████████████████████████████████████████████████████████████| 735k/735k [00:00<00:00, 3.49MB/s]



Uploading L:/My Drive/Source_Files\PDF\DMP Sylabus.pdf:


100%|███████████████████████████████████████████████████████████████████████████████| 583k/583k [00:00<00:00, 3.09MB/s]



Uploading L:/My Drive/Source_Files\PDF\DevSecOps Sylabus.pdf:


100%|███████████████████████████████████████████████████████████████████████████████| 873k/873k [00:00<00:00, 4.61MB/s]



Uploading L:/My Drive/Source_Files\PDF\CND Sylabus.pdf:


100%|█████████████████████████████████████████████████████████████████████████████| 1.14M/1.14M [00:00<00:00, 6.40MB/s]



Uploading L:/My Drive/Source_Files\PDF\CISO Sylabus.pdf:


100%|███████████████████████████████████████████████████████████████████████████████| 832k/832k [00:00<00:00, 3.95MB/s]



Uploading L:/My Drive/Source_Files\PDF\Advanced Project Management Sylabus.pdf:


100%|████████████████████████████████████████████████████████████████████████████████| 127k/127k [00:00<00:00, 992kB/s]



Uploading L:/My Drive/Source_Files\PDF\FinOps Sylabus.pdf:


100%|███████████████████████████████████████████████████████████████████████████████| 196k/196k [00:00<00:00, 1.18MB/s]



Uploading L:/My Drive/Source_Files\PDF\AWS Solution Architect Sylabus.pdf:


100%|███████████████████████████████████████████████████████████████████████████████| 200k/200k [00:00<00:00, 1.26MB/s]



Uploading L:/My Drive/Source_Files\PDF\AWS Practitioner Sylabus.pdf:


100%|████████████████████████████████████████████████████████████████████████████████| 147k/147k [00:00<00:00, 847kB/s]



Uploading L:/My Drive/Source_Files\PDF\CC Sylabus.pdf:


100%|████████████████████████████████████████████████████████████████████████████████| 156k/156k [00:00<00:00, 989kB/s]



Uploading L:/My Drive/Source_Files\PDF\CCNA Sylabus.pdf:


100%|███████████████████████████████████████████████████████████████████████████████| 214k/214k [00:00<00:00, 1.21MB/s]



Uploading L:/My Drive/Source_Files\PDF\CCSP Sylabus.pdf:


100%|████████████████████████████████████████████████████████████████████████████████| 169k/169k [00:00<00:00, 953kB/s]



Uploading L:/My Drive/Source_Files\PDF\CISSP Sylabus.pdf:


100%|███████████████████████████████████████████████████████████████████████████████| 189k/189k [00:00<00:00, 2.11MB/s]



Uploading L:/My Drive/Source_Files\PDF\DPO Sylabus.pdf:


100%|████████████████████████████████████████████████████████████████████████████████| 153k/153k [00:00<00:00, 863kB/s]



Uploading L:/My Drive/Source_Files\PDF\Fortinet NSE Sylabus.pdf:


100%|███████████████████████████████████████████████████████████████████████████████| 189k/189k [00:00<00:00, 1.21MB/s]



Uploading L:/My Drive/Source_Files\PDF\desktop.ini:


100%|█████████████████████████████████████████████████████████████████████████████████| 246/246 [00:00<00:00, 4.86kB/s]



Uploading L:/My Drive/Source_Files\JSON\desktop.ini:


100%|█████████████████████████████████████████████████████████████████████████████████| 246/246 [00:00<00:00, 6.52kB/s]



