[Reference](https://python.plainenglish.io/i-automated-6-hours-of-excel-work-with-40-lines-of-python-that-i-didnt-write-a486ba0be2a0)

In [1]:
import pandas as pd

# AI Comment: Load the messy data
sales_df = pd.read_csv('Internal_Sales.csv')
bank_df = pd.read_csv('Bank_Statement.csv')

# AI Comment: Function to clean company names for better matching
def clean_name(name):
    if isinstance(name, str):
        # Standardize: Uppercase, strip spaces
        name = name.upper().strip()
        # Remove common suffixes that cause mismatches
        name = name.replace(" INC", "").replace(" LLC", "").replace(" BILLING", "")
    return name

# Apply the cleaning function
sales_df['Clean_Name'] = sales_df['CustomerName'].apply(clean_name)
bank_df['Clean_Name'] = bank_df['Description'].apply(clean_name)

# Fix the date formats
sales_df['Date'] = pd.to_datetime(sales_df['OrderDate'])
bank_df['Date'] = pd.to_datetime(bank_df['TransactionDate'])

# Merge the files
merged_df = pd.merge(sales_df, bank_df, on='Clean_Name', how='left', indicator=True)

# Filter for the "Left Only" (Missing in Bank)
missing_money = merged_df[merged_df['_merge'] == 'left_only']

# Export
missing_money.to_excel('Discrepancy_Report.xlsx', index=False)

print(f"Done. Found {len(missing_money)} discrepancies.")