## CAIRN Backblaze B2 Folder Upload Script

This script uploads all files from a specified local folder (e.g., on your Desktop)
to CAIRN's Backblaze B2 bucket and lists the uploaded files.

1.  **Install Libraries:**
    Open your terminal or command prompt and run:
    pip install python-dotenv b2sdk

2.  **Create `.env` File:**
    In the SAME directory where you save this Python script, create a file named exactly `.env` (note the leading dot). Open it in a text editor and add your Backblaze B2 Application Key ID, Application Key, and the exact name of the bucket you want to upload to:

    B2_ACCESS_KEY_ID=YOUR_APPLICATION_KEY_ID  
    B2_SECRET_ACCESS_KEY=YOUR_APPLICATION_KEY  
    B2_BUCKET_NAME=your-exact-b2-bucket-name  

    Replace the placeholders with your actual credentials and bucket name.

3.  **Set the Local Folder Path:**
    Find the line below that starts with `LOCAL_FOLDER_PATH = ...`  
    Modify the path to point to the *exact* folder on your computer that contains the files you want to upload.  Utilize an LLM For clarity if confusion.


4.  **Set B2 Folder Prefix (Optional):**
    If you want the files to appear inside a specific 'folder' within your B2 bucket, change `B2_FOLDER_PREFIX`.   
    For example: B2_FOLDER_PREFIX = "project_files/january/"
    Leave as "" to upload to the bucket's root.

5.  **Run the Script:**
    Open your terminal or command prompt, navigate to the directory where you saved this script and the `.env` file, and run: python your_script_name.py or run as a juypter notebook

6.  **Check Output:**
    The script will log its progress in the terminal, including connection status, which files it's uploading, and a final summary listing thesuccessfully uploaded files (with their names as they appear in B2).


In [None]:
#Imports
import os
import logging
from pathlib import Path
from dotenv import load_dotenv
from b2sdk.v2 import B2Api, InMemoryAccountInfo

In [None]:
# Load env & set configs
# --- Load Environment Variables from .env file ---
load_dotenv()
# ---

# --- Basic Logging Setup ---
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# ---! Configuration: YOU MUST EDIT THESE TWO LINES !---

# 1. LOCAL_FOLDER_PATH:
#    Set the FULL path to the LOCAL FOLDER on your computer whose contents
#    you want to upload.
#    Example (Windows): LOCAL_FOLDER_PATH = Path(r"C:\Users\YourUser\Documents\UploadMe")
#    Example (Mac/Linux): LOCAL_FOLDER_PATH = Path("/home/youruser/data/source_files")
#LOCAL_FOLDER_PATH = Path(r"C:\path\to\your\local\FOLDER") # <--- CHANGE THIS

LOCAL_FOLDER_PATH = Path(r"C:\Users\shugs\OneDrive\Desktop\Cairn (W)\CAIRN_PDF_Testing_3.2025\PUC RuleMaking") # <--- CHANGE THIS


# 2. B2_FOLDER_PREFIX:
#    Set the 'virtual folder' path within your B2 bucket where files should be
#    uploaded. Leave as "" to upload to the bucket's root.
#    IMPORTANT: If you use a prefix, end it with a forward slash '/'
#    Example (Root): B2_FOLDER_PREFIX = ""
#    Example (Folder): B2_FOLDER_PREFIX = "project_files/images/"
#B2_FOLDER_PREFIX = "" # <--- CHANGE THIS (e.g., "my_uploads/" or "")

# Use upload_testing_folder as default 
B2_FOLDER_PREFIX = "upload_testing_folder/" 

# --- End of Configuration ---


In [None]:
def sync_folder_to_b2(local_folder_path, b2_folder_prefix):
    """
    Uploads files from a local folder to a B2 bucket, skipping files
    that already exist. Provides a detailed report of actions taken.

    Args:
        local_folder_path (Path): Path object for the local folder.
        b2_folder_prefix (str): Target prefix (folder) in B2. Should end with '/'
                                 if not empty.

    Returns:
        bool: True if the overall process attempted execution, False if setup failed.
    """
    logging.info(f"Starting B2 sync process for local folder: {local_folder_path}")
    logging.info(f"Target B2 prefix: '{b2_folder_prefix}'")

    # --- Step 1: Validate Local Folder ---
    if not local_folder_path.is_dir():
        logging.error(f"Error: Local folder not found or is not a directory: {local_folder_path}")
        return False
    logging.info(f"Local folder confirmed: {local_folder_path}")

    # --- Step 2: Get B2 Credentials ---
    required_vars = ["B2_ACCESS_KEY_ID", "B2_SECRET_ACCESS_KEY", "B2_BUCKET_NAME"]
    key_id = os.getenv("B2_ACCESS_KEY_ID")
    application_key = os.getenv("B2_SECRET_ACCESS_KEY")
    bucket_name = os.getenv("B2_BUCKET_NAME")

    if not all([key_id, application_key, bucket_name]):
        missing_vars = [var for var in required_vars if not os.getenv(var)]
        logging.error(f"Error: Missing required B2 configuration from .env file: {', '.join(missing_vars)}")
        return False
    logging.info("B2 credentials loaded successfully from .env file.")

    # Initialize lists to store file names for the final report
    uploaded_files = []
    skipped_files = []
    failed_files = []
    local_files_processed = 0

    try:
        # --- Step 3: Initialize B2 API and Connect ---
        logging.info("Initializing B2 API connection...")
        info = InMemoryAccountInfo()
        b2_api = B2Api(info)
        logging.info("Authorizing B2 account...")
        b2_api.authorize_account("production", key_id, application_key)
        logging.info("Successfully authorized B2 account.")
        logging.info(f"Connecting to bucket: {bucket_name}...")
        bucket = b2_api.get_bucket_by_name(bucket_name)
        logging.info(f"Successfully connected to bucket: {bucket_name}")

        # --- Step 4: Get List of Existing Files in B2 Target Prefix ---
        logging.info(f"Checking for existing files in B2 bucket '{bucket_name}' under prefix '{b2_folder_prefix}'...")
        existing_b2_files = set()
        try:
            # List files only within the specified prefix, not recursively deeper
            # The generator yields FileVersionInfo objects
            # If the prefix doesn't exist or is empty, bucket.ls() might raise an exception
            # (like FileNotPresent, now caught by the general Exception below) or yield nothing.
            items_listed = 0
            for file_info, _ in bucket.ls(folder_to_list=b2_folder_prefix or None, recursive=False, latest_only=True):
                 items_listed += 1
                 relative_name = file_info.file_name
                 if b2_folder_prefix and relative_name.startswith(b2_folder_prefix):
                     relative_name = relative_name[len(b2_folder_prefix):]
                 
# Explicitly check if relative_name is a non-empty string before using string methods
            if relative_name is not None and relative_name != "" and not relative_name.endswith('/'):
                existing_b2_files.add(relative_name)
            elif relative_name is None:
            # Log if we find an entry with no name - this shouldn't normally happen for files
                logging.warning(f"Found an item in B2 listing with a None file name: {file_info}")
            # Implicitly ignores empty strings "" and names ending with "/" (folder markers)

            if items_listed == 0:
                 logging.info("B2 prefix/folder appears empty or does not contain files.")
            logging.info(f"Found {len(existing_b2_files)} existing files in B2 target location.")

        # Removed specific FileNotPresent handler.
        # General Exception will now catch errors during B2 listing.
        except Exception as e:
            logging.error(f"Error listing files in B2 bucket (prefix: '{b2_folder_prefix}'): {e}")
            # Decide if this is critical. If listing fails, we cannot accurately skip.
            # For safety, let's treat failure to list as critical.
            logging.error("Cannot reliably determine existing files. Aborting sync.")
            return False # Cannot proceed without knowing existing files

        # --- Step 5: Iterate Through Local Files and Upload/Skip ---
        logging.info(f"Scanning local folder for files: {local_folder_path}")
        for local_item in local_folder_path.iterdir():
            if local_item.is_file():
                local_files_processed += 1
                local_file_name = local_item.name
                target_b2_file_name = f"{b2_folder_prefix}{local_file_name}"

                if local_file_name in existing_b2_files:
                    logging.info(f"Skipping: '{local_file_name}' already exists in B2 as '{target_b2_file_name}'.")
                    skipped_files.append(local_file_name) # Add to skipped list
                else:
                    logging.info(f"Uploading: '{local_file_name}' to B2 as '{target_b2_file_name}'...")
                    try:
                        uploaded_file_info = bucket.upload_local_file(
                            local_file=str(local_item),
                            file_name=target_b2_file_name
                        )
                        logging.debug(f"Successfully uploaded {local_file_name} (ID: {uploaded_file_info.id_})")
                        uploaded_files.append(local_file_name) # Add to uploaded list
                    except Exception as e:
                        logging.error(f"Failed to upload '{local_file_name}': {e}")
                        failed_files.append(local_file_name) # Add to failed list

        # --- Step 6: Print Final Summary Report ---
        logging.info("=" * 50)
        logging.info("              Sync Process Summary Report")
        logging.info("=" * 50)
        logging.info(f"Local Source Folder: {local_folder_path}")
        logging.info(f"Target B2 Bucket: {bucket_name}")
        logging.info(f"Target B2 Prefix: '{b2_folder_prefix}'")
        logging.info(f"\nTotal Local Files Processed: {local_files_processed}")
        logging.info("-" * 50)

        # Print Uploaded Files
        logging.info(f"Files Uploaded Successfully ({len(uploaded_files)}):")
        if uploaded_files:
            for filename in sorted(uploaded_files):
                logging.info(f"  - {filename}")
        else:
            logging.info("  (None)")
        logging.info("-" * 50)

        # Print Skipped Files
        logging.info(f"Files Skipped - Already Exist in B2 ({len(skipped_files)}):")
        if skipped_files:
            for filename in sorted(skipped_files):
                logging.info(f"  - {filename}")
        else:
            logging.info("  (None)")
        logging.info("-" * 50)

        # Print Failed Files
        logging.info(f"Files Failed to Upload ({len(failed_files)}):")
        if failed_files:
            for filename in sorted(failed_files):
                logging.info(f"  - {filename}")
        else:
            logging.info("  (None)")
        logging.info("=" * 50)

        # Store failure count for exit status determination below
        # We need to access this *outside* the main try...except block
        global script_failed_file_count # Make count accessible outside function scope if needed by main block
        script_failed_file_count = len(failed_files)

        return True # Indicate the sync process logic completed execution

    except Exception as e:
        logging.error(f"A critical error occurred during the B2 sync process: {e}")
        logging.exception("Detailed traceback:")
        script_failed_file_count = -1 # Indicate failure occurred before summary could be generated
        return False # Indicate failure before completing the sync logic

# --- Main Execution Block ---
script_failed_file_count = -1 # Initialize globally to track failures

if __name__ == "__main__":
    logging.info("=== B2 Folder Sync Script (.env, skip existing, detailed report) Starting ===")

    local_folder = Path(LOCAL_FOLDER_PATH)
    b2_prefix = str(B2_FOLDER_PREFIX)

    if b2_prefix and not b2_prefix.endswith('/'):
        logging.warning(f"B2 folder prefix '{b2_prefix}' did not end with '/'. Appending it.")
        b2_prefix += '/'

    # Execute the sync function
    process_completed = sync_folder_to_b2(local_folder, b2_prefix)

    # Determine final exit status based on whether the process ran and if failures occurred
    if process_completed and script_failed_file_count == 0:
        logging.info("Script finished successfully.")
    elif process_completed and script_failed_file_count > 0:
        logging.warning(f"Script finished, but {script_failed_file_count} file(s) failed to upload. See report above.")
        exit(1) # Exit with error code if failures occurred
    else:
        # This case handles failures during setup or B2 connection before processing files
        logging.error("Script failed during setup or a major B2 operation (like listing files). Please check logs.")
        exit(1) # Exit with error code

    logging.info("=== Script Execution Complete ===")