In [None]:
import pandas as pd
import re
from openpyxl import load_workbook

# Function to clean sheet names by removing invalid characters
def clean_sheet_name(name):
    return re.sub(r'[:\\/*?\[\]]', '', str(name))[:31]

# Define the column mapping based on sheet names
sheet_column_mapping = {
    'SCB': range(0, 27),
    'HTB': range(0, 28),
    'GB': range(0, 12),
    'REB': range(0, 26),
    'FSB': range(0, 14),
    'FB': range(0, 18),
    'AB': range(0, 9),
    'CSC': range(0, 21),
}

# Load the Excel file and list all sheets
file_path = r'01_data\2024_Segment_BU_Actual_09_2567.xlsx'
xls = pd.ExcelFile(file_path)
sheet_names = xls.sheet_names  # List all sheet names

# Define the keyword for splitting
split_keyword = 'MBK Group'

# Output Excel file
output_file = 'MTD_Segment_BU.xlsx'

# Create an Excel writer object to append data without overwriting
with pd.ExcelWriter(output_file, engine='openpyxl', mode='w') as writer:
    for sheet_name in sheet_names:
        all_data = pd.DataFrame()  # Initialize an empty DataFrame for each sheet

        if sheet_name not in sheet_column_mapping:
            print(f"Skipping sheet '{sheet_name}' as it's not in the mapping.")
            continue

        bu = sheet_name

        # Determine columns to keep based on the mapping
        columns_to_keep = sheet_column_mapping.get(sheet_name, range(12))  

        # Load each sheet and set row 2 as header
        df = pd.read_excel(file_path, sheet_name=sheet_name, header=1)

        # Find indices where 'MBK Group' appears in Column A
        split_indices = df[df.iloc[:, 0] == split_keyword].index.tolist()
        
        # Add the end of the DataFrame for the last segment
        split_indices.append(len(df))

        for i in range(len(split_indices) - 1):
            # Set the start and end indices (start from +3)
            start_idx = split_indices[i] + 3
            end_idx = split_indices[i + 1]

            # Extract the table and limit to the mapped columns
            table = df.iloc[start_idx:end_idx, columns_to_keep]

            # Drop completely empty rows
            table.dropna(how='all', inplace=True)

            # Get the date from the split index and clean it
            if (split_indices[i] + 2) < len(df):
                date_txt = clean_sheet_name(str(df.iloc[split_indices[i] + 2, 0]))[:11]
            else:
                date_txt = "Unknown-00"

            # Append Year, Month, and BU columns
            if '-' in date_txt:
                table['Year'] = date_txt.split('-')[0]
                table['Month'] = date_txt.split('-')[1]
            else:
                table['Year'] = 'Unknown'
                table['Month'] = 'Unknown'

            table['BU'] = bu

            # Append the data to the main DataFrame for this sheet
            all_data = pd.concat([all_data, table], ignore_index=True)

        # Save the data for each sheet in its own sheet within the Excel file
        all_data.to_excel(writer, sheet_name=bu, index=False)

print("Data appended, mapped with sheet names, and saved successfully to multiple sheets.")


Skipping sheet 'MBK_Group' as it's not in the mapping.
Skipping sheet 'AAA' as it's not in the mapping.
Data appended, mapped with sheet names, and saved successfully to multiple sheets.


In [88]:
import pandas as pd
import re
from openpyxl import load_workbook

# Function to clean sheet names by removing invalid characters
def clean_sheet_name(name):
    return re.sub(r'[:\\/*?\[\]]', '', str(name))[:31]

# Define the column mapping based on sheet names
sheet_column_mapping = {
    'SCB': range(28, 28+21),
    'HTB': range(29, 29+28),
    'GB': range(13, 13+12),
    'REB': range(27, 27+26),
    'FSB': range(15, 15+14),
    'FB': range(19, 19+18),
    'AB': range(10, 10+9),
    'CSC': range(22, 22+21),
}

# Load the Excel file and list all sheets
file_path = r'01_data\2024_Segment_BU_Actual_09_2567.xlsx'
xls = pd.ExcelFile(file_path)
sheet_names = xls.sheet_names  # List all sheet names

# Define the keyword for splitting
split_keyword = 'MBK Group'

# Output Excel file
output_file = 'YTD_Segment_BU.xlsx'

# Create an Excel writer object to append data without overwriting
with pd.ExcelWriter(output_file, engine='openpyxl', mode='w') as writer:
    for sheet_name in sheet_names:
        all_data = pd.DataFrame()  # Initialize an empty DataFrame for each sheet

        if sheet_name not in sheet_column_mapping:
            print(f"Skipping sheet '{sheet_name}' as it's not in the mapping.")
            continue

        bu = sheet_name

        # Determine columns to keep based on the mapping
        columns_to_keep = sheet_column_mapping.get(sheet_name, range(12))  

        # Load each sheet and set row 2 as header
        df = pd.read_excel(file_path, sheet_name=sheet_name, header=1)

        # Find indices where 'MBK Group' appears in Column A
        split_indices = df[df.iloc[:, 0] == split_keyword].index.tolist()
        
        # Add the end of the DataFrame for the last segment
        split_indices.append(len(df))

        for i in range(len(split_indices) - 1):
            # Set the start and end indices (start from +3)
            start_idx = split_indices[i] + 3
            end_idx = split_indices[i + 1]

            # Extract the table and limit to the mapped columns
            table = df.iloc[start_idx:end_idx, columns_to_keep]

            # Drop completely empty rows
            table.dropna(how='all', inplace=True)

            # Get the date from the split index and clean it
            if (split_indices[i] + 2) < len(df):
                date_txt = clean_sheet_name(str(df.iloc[split_indices[i] + 2, 0]))[:11]
            else:
                date_txt = "Unknown-00"

            # Append Year, Month, and BU columns
            if '-' in date_txt:
                table['Year'] = date_txt.split('-')[0]
                table['Month'] = date_txt.split('-')[1]
            else:
                table['Year'] = 'Unknown'
                table['Month'] = 'Unknown'

            table['BU'] = bu

            # Append the data to the main DataFrame for this sheet
            all_data = pd.concat([all_data, table], ignore_index=True)

        # Save the data for each sheet in its own sheet within the Excel file
        all_data.to_excel(writer, sheet_name=bu, index=False)

print("Data appended, mapped with sheet names, and saved successfully to multiple sheets.")


Skipping sheet 'MBK_Group' as it's not in the mapping.
Skipping sheet 'AAA' as it's not in the mapping.
Data appended, mapped with sheet names, and saved successfully to multiple sheets.
