In [None]:
# 1 Imports
import pandas as pd
import numpy as np

# 2 Load Data
df = pd.read_excel("Pay_10-25.xlsx")
df.info()
df.head()

# 3 Normalize Strings
string_cols = df.select_dtypes(include="object").columns
df[string_cols] = df[string_cols].apply(
    lambda col: col.astype(str).str.strip().replace(r"^(nan|NaN|None|)$", np.nan, regex=True)
)
df.info()

# 4 Normalize Dates
for col in ["Transaction Date", "Processed Date"]:
    if col in df.columns:
        df[col] = pd.to_datetime(df[col], errors="coerce").dt.date

# 5 Drop & Reorder Columns
df.drop(columns=["Conversion Charge", "Foreign Currency Amount"], errors="ignore", inplace=True)

if {"To/From Account Number", "Processed Date"}.issubset(df.columns):
    cols = df.columns.tolist()
    cols.insert(cols.index("Processed Date") + 1, cols.pop(cols.index("To/From Account Number")))
    df = df[cols]

# 6 Remove November Transactions
if "Processed Date" in df.columns:
    df = df[~pd.to_datetime(df["Processed Date"], errors="coerce").dt.month.eq(11)]

# # 7 Remove Cancelling Failed Payments / Reversals -- Incorrect lines
# mask_failed = df["Type"].str.contains("Failed Payment|Payment Reversal|Unpaid Item Reversal", case=False, na=False)
# failed_df = df[mask_failed].copy()
# indices_to_drop = set()

# for idx, fail_row in failed_df.iterrows():
#     match_mask = (
#         (df["Amount"] == -fail_row["Amount"]) &
#         (df["Details"] == fail_row["Details"]) &
#         (df["Code"] == fail_row["Code"]) &
#         (~mask_failed) &
#         (~df.index.isin(indices_to_drop))
#     )
#     match = df[match_mask]
#     if not match.empty:
#         match_idx = match.index[0]
#         indices_to_drop.update([idx, match_idx])

# df = df.drop(index=indices_to_drop).reset_index(drop=True)
# print(f"{len(indices_to_drop) // 2} matched reversal pairs removed.")

# 8 Group by Code (with Conditional Handling)
code_clean = df["Code"].astype(str).str.strip()
mask_valid = (
    df["Code"].notna() &
    (code_clean != "") &
    (~code_clean.isin(["Billing", "Transfer"])) &
    (~code_clean.str.isdigit())
)

df_valid = df[mask_valid].copy()
df_conditional = df[~mask_valid].copy()

agg_common = {
    "Transaction Date": "first",
    "Processed Date": "first",
    "To/From Account Number": "first",
    "Particulars": "first",
    "Balance": "first",
    "Code": "first",
    "Type": lambda x: ", ".join(set(x)),
    "Amount": "sum"
}

agg_valid = agg_common | {"Details": "first"}
df_valid = df_valid.groupby("Code", as_index=False).agg(agg_valid)

if not df_conditional.empty:
    df_conditional = df_conditional.groupby(["Type", "Details"], as_index=False).agg(agg_common | {"Details": "first"})

df = pd.concat([df_valid, df_conditional], ignore_index=True)

# 9 Reorder Columns
desired_order = [
    "Transaction Date", "Processed Date", "Code", "Type",
    "To/From Account Number", "Details", "Particulars", "Amount", "Balance"
]
df = df[[col for col in desired_order if col in df.columns]]

# 10 Final Preview
print("\nFirst 100 rows:")
df.head(100)

# 11 Optional Export
# df.to_excel("Pay_10-25_Cleaned.xlsx", index=False)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49 entries, 0 to 48
Data columns (total 9 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   Transaction Date        49 non-null     datetime64[ns]
 1   Processed Date          49 non-null     datetime64[ns]
 2   Type                    49 non-null     object        
 3   Details                 49 non-null     object        
 4   Particulars             30 non-null     object        
 5   Code                    37 non-null     object        
 6   Amount                  49 non-null     float64       
 7   Balance                 49 non-null     float64       
 8   To/From Account Number  23 non-null     object        
dtypes: datetime64[ns](2), float64(2), object(5)
memory usage: 3.6+ KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49 entries, 0 to 48
Data columns (total 9 columns):
 #   Column                  Non-Null Count  Dtype         
---  

Unnamed: 0,Transaction Date,Processed Date,Code,Type,To/From Account Number,Details,Particulars,Amount,Balance
0,2025-10-09,2025-10-09,Les Mills,Visa Purchase,,4835-****-****-9991 Df,,-28.5,175.03
1,2025-10-28,2025-10-28,Microsoft*St,Visa Purchase,,4835-****-****-9991 If,,-17.0,0.85
2,2025-10-31,2025-10-31,One Nz,Visa Purchase,,4835-****-****-9991 Df,,-236.63,57.15
3,2025-10-30,2025-10-30,,Automatic Payment,06-0201-0005529-02,Bc,,-75.0,101.5
4,2025-10-29,2025-10-29,,Deposit,,"Yu,Zhuo Cheng",,490.0,270.85
5,2025-10-30,2025-10-30,624529,Direct Debit,,Intercoll Ledger Ltd,,-50.0,181.5
6,2025-10-24,2025-10-24,,Direct Debit,,Kaiz Skin Body,,-228.6,17.85
7,2025-10-23,2025-10-23,Billing,Direct Debit,,Les Mills,Les Mills,-114.0,187.5
8,2025-10-30,2025-10-30,00027926489,Direct Debit,,Mercury Nz Ltd,Mercury Ener,-325.0,116.5
9,2025-10-30,2025-10-31,Transfer,Transfer,06-0237-0783197-00,06-0237-0783197-00,Debit,284.8,101.42
