In [None]:
!pip install awswrangler xlrd

In [6]:
import sys
import logging
import os
import time
import boto3
import json

In [7]:
sts_client = boto3.client('sts')
session = boto3.session.Session()
region =  session.region_name
account_id = sts_client.get_caller_identity()["Account"]

In [10]:
import awswrangler as wr
import re
from pathlib import Path

def clean_name(name):
    # Remove special characters and spaces
    clean = re.sub(r'[^a-zA-Z0-9]', '_', name)
    # Convert to lowercase
    clean = clean.lower()
    # Remove multiple underscores
    clean = re.sub(r'_+', '_', clean)
    # Remove leading/trailing underscores
    clean = clean.strip('_')
    return clean

def process_excel_files(s3_source_folder, s3_output_prefix):
    # List all Excel files in the source folder
    excel_files = wr.s3.list_objects(
        path=s3_source_folder,
        suffix=('.xlsx', '.xls')
    )
    
    for excel_file in excel_files:
        # Get file name without extension
        file_name = clean_name(Path(excel_file).stem)
        print(f"Processing file: {file_name}")
        
        try:
            # Read all sheets from Excel file
            dfs = wr.s3.read_excel(
                path=excel_file,
                sheet_name=None  # None reads all sheets
            )
            
            # Process each sheet
            for sheet_name, df in dfs.items():
                clean_sheet_name = clean_name(sheet_name)
                output_path = f"{s3_output_prefix}/{file_name}/{clean_sheet_name}/{clean_sheet_name}.csv"
                
                wr.s3.to_csv(
                    df=df,
                    path=output_path,
                    index=False
                )
                print(f"Created: {output_path}")
                
        except Exception as e:
            print(f"Error processing {file_name}: {str(e)}")

# Example usage
source_folder = f"s3://demo-xls-{account_id}/"
output_prefix = f"s3://demo-csv-{account_id}/output"
process_excel_files(source_folder, output_prefix)


Processing file: superstore
Created: s3://demo-csv-639649899411/output/superstore/orders/orders.csv
Created: s3://demo-csv-639649899411/output/superstore/people/people.csv
Created: s3://demo-csv-639649899411/output/superstore/returns/returns.csv
