In [None]:
import os
import zipfile
import datetime
import shutil
import logging
import argparse  # Import the argparse module


def get_zip_file_creation_dates(zip_path):
    """
    Retrieves the creation dates of CSV files within a ZIP archive.

    Args:
        zip_path (str): The path to the ZIP file.

    Returns:
        dict: A dictionary mapping filenames to their creation dates (YYYY-MM-DD).
    """
    file_dates = {}
    try:
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            for file_info in zip_ref.infolist():
                if file_info.filename.endswith('.csv'):
                    # Extract date_time and format as YYYY-MM-DD
                    creation_date = datetime.datetime(*file_info.date_time).strftime("%Y-%m-%d")
                    file_dates[file_info.filename] = creation_date
    except Exception as e:
        logging.error(f"Error reading ZIP file: {e}")  # Log any errors
    return file_dates


def setup_logging(log_level=logging.INFO):
    """
    Sets up basic logging configuration.

    Args:
       log_level (int, optional):  The desired logging level (e.g., logging.INFO, logging.DEBUG).  Defaults to logging.INFO.
    """
    logging.basicConfig(level=log_level, format='%(asctime)s - %(levelname)s - %(message)s')


def extract_zip_file(zip_path, extract_path):
    """
    Extracts all files from a ZIP archive to a specified directory.

    Args:
        zip_path (str): The path to the ZIP file.
        extract_path (str): The directory to extract the files to.

    Returns:
        bool: True if extraction was successful, False otherwise.
    """
    try:
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(extract_path)
        logging.info(f"Extracted ZIP file {zip_path} to {extract_path}")
        return True  # Return True for success
    except Exception as e:
        logging.error(f"Error extracting ZIP file {zip_path}: {e}")
        return False  # Return False for failure
    #return True # No need to return anything if extraction fails, the except block handles it.


def rename_and_move_files(extract_path, destination_base_path, file_to_folder, file_creation_dates):
    """
    Renames extracted CSV files based on their creation date and moves them to
    designated folders.

    Args:
        extract_path (str):  The path to the directory where files were extracted.
        destination_base_path (str): The base path for the destination folders.
        file_to_folder (dict): A dictionary mapping filename prefixes to folder names.
        file_creation_dates (dict):  A dictionary mapping original filenames to creation dates.
    """
    for filename in os.listdir(extract_path):
        if filename.endswith(".csv"):
            base_name = filename[:-4]  # Remove .csv extension
            if filename in file_creation_dates:  # Check if the date was successfully extracted.
                creation_date_str = file_creation_dates[filename]
                name_part = base_name.rsplit("_", 1)[0]  # Split at the last underscore
                new_filename = f"{name_part}_{creation_date_str}.csv"  # Create new filename
                old_file_path = os.path.join(extract_path, filename)
                new_file_path = os.path.join(extract_path, new_filename)

                # Handle potential filename collisions by adding a counter.
                counter = 1
                while os.path.exists(new_file_path):
                    new_filename = f"{name_part}_{creation_date_str}({counter}).csv"
                    new_file_path = os.path.join(extract_path, new_filename)
                    counter += 1

                os.rename(old_file_path, new_file_path)  # Rename the file
                logging.info(f"Renamed {filename} to {new_filename}")

                # Determine the destination folder based on the filename prefix.
                folder_name = None
                for key, folder in file_to_folder.items():
                    if new_filename.startswith(key):
                        folder_name = folder
                        break  # Exit the inner loop once a match is found.

                # Move the file to the appropriate destination folder.
                if folder_name:
                    destination_folder_path = os.path.join(destination_base_path, folder_name)
                    # Create the destination folder if it doesn't exist.
                    if not os.path.exists(destination_folder_path):
                        os.makedirs(destination_folder_path)
                    shutil.move(new_file_path, os.path.join(destination_folder_path, new_filename))
                    logging.info(f"Moved {new_filename} to {destination_folder_path}")
                else:
                    logging.warning(f"No destination folder found for {new_filename}")  # Log if no folder mapping
            else: #if filename not in file_creation_dates.
                logging.warning(f"Could not determine creation date for {filename}, skipping rename/move.")


def cleanup(zip_path, extract_path):
    """
    Deletes the original ZIP file and the extraction directory.

    Args:
        zip_path (str): The path to the ZIP file.
        extract_path (str): The path to the extraction directory.
    """
    try:
        if os.path.exists(zip_path):
            os.remove(zip_path)
            logging.info(f"Removed original ZIP file: {zip_path}")
        if os.path.exists(extract_path):
            shutil.rmtree(extract_path)
            logging.info(f"Removed extracted folder: {extract_path}")
    except Exception as e:
        logging.error(f"Error during cleanup: {e}")


def main():
    """
    Main function to orchestrate the ZIP file processing, renaming, moving, and cleanup.
    """

    # Set up argument parsing
    parser = argparse.ArgumentParser(description="Process and organize CSV files from a ZIP archive.")
    parser.add_argument("downloads_path", help="The path to the downloads directory.")
    parser.add_argument("extract_base_path", help="The base path for temporary file extraction.")
    parser.add_argument("destination_base_path", help="The base path for organized output files.")
    parser.add_argument("-l", "--log_level", default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
                        help="Set the logging level (default: INFO).")
    parser.add_argument("-f", "--file_to_folder",  nargs='+',
                       help="""Mapping of filename prefixes to folder names,
                       in the format 'prefix1:folder1 prefix2:folder2 ...'.
                       Example: -f 'prefixA:folderX prefixB:folderY'""")
    args = parser.parse_args()
    
    # Convert string to logging level
    numeric_level = getattr(logging, args.log_level.upper(), None)
    if not isinstance(numeric_level, int):
        raise ValueError(f'Invalid log level: {args.log_level}')
    setup_logging(numeric_level)


    # Parse the file_to_folder mappings from the command line.
    if args.file_to_folder:
        try:
            file_to_folder = {}
            for item in args.file_to_folder:
                key, value = item.split(":")
                file_to_folder[key] = value
        except ValueError:
            logging.error("Invalid format for --file_to_folder.  Use 'prefix1:folder1 prefix2:folder2 ...'")
            return  # Exit if the format is incorrect
    else:
        logging.error("The --file_to_folder argument is required.")
        return # Exit if the required parameter is missing

    # Iterate through all ZIP files in the downloads directory.
    for zip_filename in os.listdir(args.downloads_path):
        # Use a generic prefix, not the potentially sensitive "SORT-SprinklrData".
        if zip_filename.startswith("DataFiles-") and zip_filename.endswith(".zip"):
            zip_path = os.path.join(args.downloads_path, zip_filename)
            extract_path = os.path.join(args.extract_base_path, os.path.splitext(zip_filename)[0])
            # Extract the zip file.  Continue to the next file if extraction fails.
            if not extract_zip_file(zip_path, extract_path):
                continue

            # Get creation dates for CSV files.
            file_creation_dates = get_zip_file_creation_dates(zip_path)

            # Rename and move files.
            rename_and_move_files(extract_path, args.destination_base_path, file_to_folder, file_creation_dates)

            # Cleanup.
            cleanup(zip_path, extract_path)

    logging.info("All files have been processed, renamed, moved, and cleanup completed successfully.")


if __name__ == "__main__":
    main()

2025-02-06 07:47:42,844 - INFO - Extracted ZIP file C:\Users\salama.154\Downloads\SORT-SprinklrData (1).zip to C:\Users\salama.154\Downloads\EMEABPO_ECO_CARReporting
2025-02-06 07:47:42,848 - INFO - Renamed SORT_Sprinklr_AHT_Agent_Level_11.csv to SORT_Sprinklr_AHT_Agent_Level_2025-02-05.csv
2025-02-06 07:47:42,850 - INFO - Moved SORT_Sprinklr_AHT_Agent_Level_2025-02-05.csv to C:\Users\salama.154\OneDrive - Majorel\101-landing\Global Dashboard\SORT\SORT_Sprinklr_AHT_Agent_Level
2025-02-06 07:47:42,852 - INFO - Renamed SORT_Sprinklr_AHT_Site_Level_10.csv to SORT_Sprinklr_AHT_Site_Level_2025-02-05.csv
2025-02-06 07:47:42,854 - INFO - Moved SORT_Sprinklr_AHT_Site_Level_2025-02-05.csv to C:\Users\salama.154\OneDrive - Majorel\101-landing\Global Dashboard\SORT\SORT_Sprinklr_AHT_Site_Level
2025-02-06 07:47:42,856 - INFO - Renamed SORT_Sprinklr_AVG_First_Response_Agent_Level_7.csv to SORT_Sprinklr_AVG_First_Response_Agent_Level_2025-02-05.csv
2025-02-06 07:47:42,858 - INFO - Moved SORT_Sprinkl