In [None]:
import pandas as pd
import re

# Helper function to convert month numbers to names
def month_number_to_name(month):
    return ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][month - 1]

# Helper function to determine the last three months
def get_last_three_months(current_month, current_year):
    months = []
    for i in range(3):
        month = (current_month - i - 1) % 12 + 1
        year = current_year if current_month - i > 0 else current_year - 1
        months.append((month, year))
    return months

# Function to handle pasting data for MoM type
def process_mom_data(sheet, sheet_name, current_month, current_year):
    # Initialize an empty list to store MoM data
    mom_data = []
    
    # Get last three months
    last_three_months = get_last_three_months(current_month, current_year)
    
    # Loop through each row in the sheet
    for index, row in sheet.iterrows():
        entity = row.get('Entity', '')
        function = row.get('RTN Level 4', '')
        mica = row.get('Account', '')
        
        # Iterate over the last three months
        for month_num, year in last_three_months:
            month_col = f"M{month_num:02d} {year}"  # Note space between month and year
            
            # Check if this month column exists in the sheet
            if month_col in sheet.columns:
                period_value = f"{month_number_to_name(month_num)}-{str(year)[-2:]}"  # Format period
                
                # Collect data based on whether the sheet is DC or HC
                if 'DC' in sheet_name.upper():
                    mom_data.append([sheet_name, '', '', '', '', '', 'MoM', period_value, row[month_col], '', '', '', '', entity, function, '', mica])
                elif 'HC' in sheet_name.upper():
                    mom_data.append([sheet_name, '', '', '', '', '', 'MoM', period_value, '', row[month_col], '', '', '', entity, function, '', mica])
    
    return pd.DataFrame(mom_data, columns=["File Name", "Level 3", "Level 4", "Cost Grouping", "Cost Type", 
                                           "Finance Region", "Attribute Type", "Period", "Cost", 
                                           "FTE/Contractor", "Country", "Level 3.5", "Level 4.5", "Entity", 
                                           "Function", "Mapped Country", "MICA"])

# Function to handle FY data
def process_fy_data(sheet, sheet_name, current_year, attribute_type):
    # Initialize an empty list to store FY data
    fy_data = []
    
    # Determine FY column
    fy_col = f"FY {current_year}"
    
    # Check if the FY column exists in the sheet
    if fy_col not in sheet.columns:
        print(f"{fy_col} not found in {sheet_name}, skipping this sheet.")
        return pd.DataFrame()  # Return an empty dataframe if the column is not found
    
    # Loop through each row in the sheet
    for index, row in sheet.iterrows():
        entity = row.get('Entity', '')
        function = row.get('RTN Level 4', '')
        mica = row.get('Account', '')
        
        # Collect data based on whether the sheet is DC or HC
        period_value = f"Dec-{str(current_year)[-2:]}"  # Format period
        if 'DC' in sheet_name.upper():
            fy_data.append([sheet_name, '', '', '', '', '', attribute_type, period_value, row[fy_col], '', '', '', '', entity, function, '', mica])
        elif 'HC' in sheet_name.upper():
            fy_data.append([sheet_name, '', '', '', '', '', attribute_type, period_value, '', row[fy_col], '', '', '', entity, function, '', mica])
    
    return pd.DataFrame(fy_data, columns=["File Name", "Level 3", "Level 4", "Cost Grouping", "Cost Type", 
                                          "Finance Region", "Attribute Type", "Period", "Cost", 
                                          "FTE/Contractor", "Country", "Level 3.5", "Level 4.5", "Entity", 
                                          "Function", "Mapped Country", "MICA"])

# Main function to process the entire file and consolidate data
def process_file(processed_file_path, current_month, current_year):
    # Load the processed file
    xls = pd.ExcelFile(processed_file_path)
    
    # Initialize empty dataframes for MoM and FY data
    df_mom = pd.DataFrame()
    df_fy_target = pd.DataFrame()
    df_fy_forecast = pd.DataFrame()
    
    # Loop through sheets and apply respective processing
    for sheet_name in xls.sheet_names:
        sheet = pd.read_excel(xls, sheet_name=sheet_name)
        
        # Convert sheet name to lower case for case-insensitive matching
        sheet_name_lower = sheet_name.lower()
        
        if 'actual' in sheet_name_lower:  # Only process 'Actual' sheets for MoM
            df_mom = pd.concat([df_mom, process_mom_data(sheet, sheet_name, current_month, current_year)], ignore_index=True)
        
        if 'target' in sheet_name_lower:  # Process 'Target' sheets for FY Target
            df_fy_target = pd.concat([df_fy_target, process_fy_data(sheet, sheet_name, current_year, 'FY Target')], ignore_index=True)
        
        if 'fcst' in sheet_name_lower:  # Process 'Fcst' sheets for FY Forecast
            df_fy_forecast = pd.concat([df_fy_forecast, process_fy_data(sheet, sheet_name, current_year, 'FY Forecast')], ignore_index=True)
    
    # Concatenate all dataframes into a final dataframe
    final_df = pd.concat([df_mom, df_fy_target, df_fy_forecast], ignore_index=True)
    
    return final_df

# Example usage
current_month = 1  # For January
current_year = 2025
processed_file_path = 'Processed_file.xlsx'  # Update with your file path

# Process the file and return the final dataframe
final_df = process_file(processed_file_path, current_month, current_year)

# Save the final dataframe to a file
final_df.to_excel('final_output_file.xlsx', index=False)  # Save as xlsx


In [None]:
import pandas as pd
import re

def month_number_to_name(month):
    """Convert month number to abbreviated month name."""
    return ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][month - 1]

def process_paste_sheet(processed_file_path, current_month, current_year):
    # Load the processed file
    xls = pd.ExcelFile(processed_file_path)
    
    # Create an empty DataFrame to collect all data for the "Paste" sheet
    paste_df = pd.DataFrame(columns=["File Name", "Level 3", "Level 4", "Cost Grouping", "Cost Type", 
                                     "Finance Region", "Attribute Type", "Period", "Costimport pandas as pd
from openpyxl import Workbook

def month_number_to_name(month):
    return ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"][month - 1]

def add_mom(sheet, current_month, current_year, ws):
    """Handle MoM data for Actual sheets."""
    month_columns = [f"M{month:02d} {current_year}" for month in
                     [(current_month - i) % 12 or 12 for i in range(3)]]
    
    for index, row in sheet.iterrows():
        entity = row.get('Entity', '')
        function = row.get('RTN Level 4', '')
        mica = row.get('Account', '')
        
        for month_col in month_columns:
            if month_col in sheet.columns:
                value = row[month_col]
                period_value = f"{month_number_to_name(current_month)}-{str(current_year)[-2:]}"
                ws.append([sheet.name, '', '', '', '', '', 'MoM', period_value, value, '', '', '', '', entity, function, '', mica])

def add_fy_target(sheet, current_month, current_year, ws):
    """Handle FY Target sheets."""
    fy_column_name = f'FY {current_year}'
    for index, row in sheet.iterrows():
        if fy_column_name in sheet.columns:
            entity = row.get('Entity', '')
            function = row.get('RTN Level 4', '')
            mica = row.get('Account', '')
            value = row[fy_column_name]

            # Determine if DC or HC
            if 'DC' in sheet.name:
                ws.append([sheet.name, '', '', '', '', '', 'FY Target', f"{month_number_to_name(current_month)}-{str(current_year)[-2:]}", value, '', '', '', '', entity, function, '', mica])
            elif 'HC' in sheet.name:
                ws.append([sheet.name, '', '', '', '', '', 'FY Target', f"{month_number_to_name(current_month)}-{str(current_year)[-2:]}", '', value, '', '', '', entity, function, '', mica])

def add_fy_forecast(sheet, current_year, ws):
    """Handle FY Forecast sheets."""
    fy_column_name = f'FY {current_year}'
    for index, row in sheet.iterrows():
        if fy_column_name in sheet.columns:
            entity = row.get('Entity', '')
            function = row.get('RTN Level 4', '')
            mica = row.get('Account', '')
            value = row[fy_column_name]

            # Paste data based on DC or HC
            if 'DC' in sheet.name:
                ws.append([sheet.name, '', '', '', '', '', 'FY Forecast', f"Dec-{str(current_year)[-2:]}", value, '', '', '', '', entity, function, '', mica])
            elif 'HC' in sheet.name:
                ws.append([sheet.name, '', '', '', '', '', 'FY Forecast', f"Dec-{str(current_year)[-2:]}", '', value, '', '', '', entity, function, '', mica])

def process_sheets(processed_file_path, current_month, current_year):
    # Load the processed file
    xls = pd.ExcelFile(processed_file_path)
    
    # Create a new workbook and the "Paste" sheet
    wb = Workbook()
    ws = wb.active
    ws.title = "Paste"

    # Define headers for the "Paste" sheet
    headers = ["File Name", "Level 3", "Level 4", "Cost Grouping", "Cost Type", 
               "Finance Region", "Attribute Type", "Period", "Cost", 
               "FTE/Contractor", "Country", "Level 3.5", "Level 4.5", "Entity", 
               "Function", "Mapped Country", "MICA"]
    ws.append(headers)  # Write headers to the first row

    for sheet_name in xls.sheet_names:
        sheet = pd.read_excel(xls, sheet_name=sheet_name)

        # Handle MoM for Actual sheets
        if 'Actual' in sheet_name:
            add_mom(sheet, current_month, current_year, ws)

        # Handle FY Target sheets
        if 'Target' in sheet_name:
            add_fy_target(sheet, current_month, current_year, ws)

        # Handle FY Forecast sheets
        if 'Fcst' in sheet_name:
            add_fy_forecast(sheet, current_year, ws)

    # Save the final workbook
    wb.save('final_output_file.xlsx')

# Example usage
current_month = 6  # June
current_year = 2024  # Current year
processed_file_path = 'Processed_file.xlsx'  # Update with your file path
process_sheets(processed_file_path, current_month, current_year)
", 
                                     "FTE/Contractor", "Country", "Level 3.5", "Level 4.5", "Entity", 
                                     "Function", "Mapped Country", "MICA"])

    # Process each sheet
    for sheet_name in xls.sheet_names:
        # Only process sheets containing 'Actual', 'Target', or 'Fcst'
        if re.search(r'Actual|Target|Fcst', sheet_name, re.IGNORECASE):
            # Only load required columns from the sheet
            sheet = pd.read_excel(xls, sheet_name=sheet_name, usecols=lambda x: re.match(r'Entity|RTN Level 4|Account|M\d{2}|FY|YTD', x))

            # Ensure relevant columns exist
            if not {'Entity', 'RTN Level 4', 'Account'}.issubset(sheet.columns):
                continue

            # Extract relevant columns
            entity_col = sheet['Entity']
            function_col = sheet['RTN Level 4']
            account_col = sheet['Account']

            # Identify cost type (DC for Cost, HC for FTE/Contractor)
            cost_type = 'Cost' if 'DC' in sheet_name else 'FTE/Contractor'

            # 1. MoM (Last 3 Months Data)
            month_cols = [col for col in sheet.columns if re.match(r'M\d{2}', col)]
            for month_col in month_cols:
                month_num = int(month_col[1:3])
                period_value = f"{month_number_to_name(month_num)}-{str(current_year)[-2:]}"

                if (current_month - month_num) % 12 in [0, 1, 2]:
                    paste_df = pd.concat([paste_df, pd.DataFrame({
                        'File Name': sheet_name,
                        'Entity': entity_col,
                        'Function': function_col,
                        'Account': account_col,
                        'Attribute Type': 'MoM',
                        'Period': period_value,
                        cost_type: sheet[month_col]
                    })], ignore_index=True)

            # 2. YTD (Summing up values up to current month)
            ytd_cols = [col for col in sheet.columns if re.match(r'M\d{2}', col) and int(col[1:3]) <= current_month]
            if ytd_cols:
                sheet['YTD Total'] = sheet[ytd_cols].sum(axis=1)

                ytd_period_value = f"{month_number_to_name(current_month)}-{str(current_year)[-2:]}"
                if 'Target' in sheet_name:
                    paste_df = pd.concat([paste_df, pd.DataFrame({
                        'File Name': sheet_name,
                        'Entity': entity_col,
                        'Function': function_col,
                        'Account': account_col,
                        'Attribute Type': 'YTD Target',
                        'Period': ytd_period_value,
                        cost_type: sheet['YTD Total']
                    })], ignore_index=True)

                elif 'Fcst' in sheet_name:
                    paste_df = pd.concat([paste_df, pd.DataFrame({
                        'File Name': sheet_name,
                        'Entity': entity_col,
                        'Function': function_col,
                        'Account': account_col,
                        'Attribute Type': 'YTD Forecast',
                        'Period': ytd_period_value,
                        cost_type: sheet['YTD Total']
                    })], ignore_index=True)

                elif 'Actual' in sheet_name:
                    paste_df = pd.concat([paste_df, pd.DataFrame({
                        'File Name': sheet_name,
                        'Entity': entity_col,
                        'Function': function_col,
                        'Account': account_col,
                        'Attribute Type': 'YTD Actual',
                        'Period': ytd_period_value,
                        cost_type: sheet['YTD Total']
                    })], ignore_index=True)

            # 3. Prior Year FY
            prior_fy_cols = [col for col in sheet.columns if f'FY {current_year - 1}' in col]
            for prior_fy_col in prior_fy_cols:
                prior_fy_period_value = f"Dec-{str(current_year - 1)[-2:]}"
                paste_df = pd.concat([paste_df, pd.DataFrame({
                    'File Name': sheet_name,
                    'Entity': entity_col,
                    'Function': function_col,
                    'Account': account_col,
                    'Attribute Type': 'Prior Year FY',
                    'Period': prior_fy_period_value,
                    cost_type: sheet[prior_fy_col]
                })], ignore_index=True)

            # 4. Prior Year YTD
            prior_ytd_cols = [col for col in sheet.columns if f'YTD {current_year - 1}' in col]
            for prior_ytd_col in prior_ytd_cols:
                prior_ytd_period_value = f"{month_number_to_name(current_month)}-{str(current_year - 1)[-2:]}"
                paste_df = pd.concat([paste_df, pd.DataFrame({
                    'File Name': sheet_name,
                    'Entity': entity_col,
                    'Function': function_col,
                    'Account': account_col,
                    'Attribute Type': 'Prior Year YTD',
                    'Period': prior_ytd_period_value,
                    cost_type: sheet[prior_ytd_col]
                })], ignore_index=True)

    # Write the final DataFrame to an Excel file
    paste_df.to_excel('final_output_file.xlsx', sheet_name='Paste', index=False)

# Example usage
current_month = 6  # Example: June
current_year = 2024  # Example: Current year
processed_file_path = 'Processed_file.xlsx'  # Update with your file path
process_paste_sheet(processed_file_path, current_month, current_year)
