In [None]:
import pandas as pd
from datetime import datetime
import calendar

# File paths
source_file_path = '2023 Vendor Power Pivot FAF_12.14.23.xlsx'
export_file_path = 'Invoice_FAF_OCT 12.14.23.csv'


# Read the Excel file
df = pd.read_excel(source_file_path, sheet_name='Pasted Values 12.14.23')

# Add Year and AA_YEAR columns with default or calculated values
# TODO: Implement the logic for populating 'Year' and 'AA_YEAR' if they are derived from other data
df['Delivery Date'] = '12.14.23'
df['Year'] = '2023'  # Replace with the necessary logic
df['AA_YEAR'] = ''  # Replace with the necessary logic

In [None]:
# Function to generate file names based on period and year
def generate_file_name(period, year):
    month_number = int(period.split(' ')[1])
    start_date = datetime(year, month_number, 1)
    
    # Get the last day of the month
    last_day = calendar.monthrange(year, month_number)[1]
    end_date = datetime(year, month_number, last_day)
    
    return f'SUB_INV_FAF_{start_date.strftime("%m%d%Y")}-{end_date.strftime("%m%d%Y")}'

def generate_unique_id(df):
    # Concatenate 'Vendor ID' and 'Invoice #' to form a unique key
    df['unique_key'] = df['Vendor ID'] + df['Invoice #'] + df['Corporate Net Amount'].astype(str)
    
    # Sort by 'unique_key' to ensure the order is consistent
    df = df.sort_values(by='unique_key')
    
    # Create a group identifier that increments for each unique 'unique_key'
    df['group_id'] = df.groupby('unique_key').cumcount() + 1
    
    # Create the 'ASSETID' by concatenating the 'group_id' and 'unique_key'
    df['ASSETID'] = df['group_id'].astype(str) + '-' + df['unique_key']
    
    # Drop the helper columns 'unique_key' and 'group_id'
    df = df.drop(columns=['unique_key', 'group_id'])
    
    return df

# Mapping periods to the Month column and filtering rows
def update_period_data(df, periods):
    quarter_map = {
        'Period 1': 'Quarter 1', 'Period 2': 'Quarter 1', 'Period 3': 'Quarter 1',
        'Period 4': 'Quarter 2', 'Period 5': 'Quarter 2', 'Period 6': 'Quarter 2',
        'Period 7': 'Quarter 3', 'Period 8': 'Quarter 3', 'Period 9': 'Quarter 3',
        'Period 10': 'Quarter 4', 'Period 11': 'Quarter 4', 'Period 12': 'Quarter 4'
    }
    # Update Month, Quarter, Corporate Net Amount
    for period in periods:
        df.loc[df[period].notna(), 'Month'] = period
        df.loc[df[period].notna(), 'Quarter'] = quarter_map[period]
        df.loc[df[period].notna(), 'Corporate Net Amount'] = df[period]


    # Generate the 'FILE_NAME' column
    for period in periods:
        year_column = df['Year'].astype(int)  # Convert 'Year' column to integer
        mask = (df['Month'] == period) & (year_column > 0)  # Create a mask for rows corresponding to the period
        file_names = year_column[mask].apply(lambda y: generate_file_name(period, y))  # Generate file names
        df.loc[mask, 'FILE_NAME'] = file_names  # Assign generated file names to 'FILE_NAME' column
        
    # Inside update_period_data function, after updating 'Corporate Net Amount'
    df['Corporate Net Amount'] = df['Corporate Net Amount'].astype(float)
    
    # Capture the Grand Total value and exclude it
    grand_total  = df.loc[df['Subway or FAF'] == 'Grand Total', 'Corporate Net Amount'].iloc[0]
    df = df[df['Subway or FAF'] != 'Grand Total'] 

    # Create a mask for rows related to specified periods
    mask = df[periods].notna().any(axis=1)
    df = df[mask]
    
    # Call generate_unique_id to add the 'ASSETID' column
    df = generate_unique_id(df)

    return df, grand_total

# Specify the periods you're interested in
period_cols = ['Period 10'] # Add other periods as needed

# Call update_period_data and receive the grand total
df, grand_total = update_period_data(df, period_cols)

In [None]:
# Define the output columns and include 'FILE_NAME'
output_columns = [
    'Subway or FAF', 'Year', 'Quarter', 'Month', 'GL Category', 'FPA Function', 
    'Project Code', 'Project Code Description', 'GL Description', 'Vendor ID', 
    'Vendor Name', 'Invoice #', 'Corporate Net Amount', 'Vendor', 
    'Org Unit - Description', 'ORG UNIT _ Business Unit Rollup', 
    'ORG UNIT _ Region Rollup', 'AA_YEAR', 'CorpSegment6 _ FAF Working Capital', 
    'Doc Date', 'CorpSegment6 _ Consolidated Mapping _ Description', 
    'CorpSegment6 _ FAF Category', 'CorpSegment6 _ FAF Category _ Description', 
    'CorpSegment6 _ FAF Managerial Mapping', 'CorpSegment6 _ FAF Managerial Mapping Description', 
    'CorpSegment6 _ Unconsolidated Mapping', 'CorpSegment6 _ Unconsolidated Mapping Description'
]  + ['FILE_NAME', 'ASSETID', 'Delivery Date']

# Ensure all desired columns are in the DataFrame, filling missing ones with blanks
for col in output_columns:
    if col not in df.columns:
        df[col] = ''

# Reorder and select columns as per requirement
df_final = df[output_columns]

# Export to CSV with UTF-8-SIG encoding
df_final.to_csv(export_file_path, encoding='utf-8-sig', index=False)

### Test the filtering is accurate before exporting the file

In [None]:
import numpy as np

# Ensure both values are floats for comparison
calculated_total = float(df['Corporate Net Amount'].sum())
grand_total = float(grand_total)

# Total amount check before exporting
calculated_total = df['Corporate Net Amount'].sum()
if not np.isclose(calculated_total, grand_total):
    print(f"Warning: The calculated total ({calculated_total}) does not match the Grand Total ({grand_total}).")
else:
    print(f"The calculated total matches the Grand Total ({calculated_total}).")
    
# Print the row count for each selected period
for period in period_cols:
    period_count = df[df['Month'] == period].shape[0]
    print(f"Row count for {period}: {period_count}")

The calculated total matches the Grand Total (53557154.22).
Row count for Period 10: 1443
