In [1]:
import pandas as pd
import numpy as np
import pathlib
import datetime
from rich import print
import xarray as xr

In [2]:
pth = pathlib.Path.home() / pathlib.Path("als/cvsbdata", follow_links=True)

In [3]:
pth.resolve()

PosixPath('/Users/kdavis10/als/cvsbdata')

In [4]:
try:
    del cl
except NameError:
    pass

cl = pd.read_csv("/Users/kdavis10/iCloud_Documents/Documents/Personal/Records/csvb/data/Combined Ledger.csv",
                    header=3, parse_dates=["Date"])

In [5]:
cl["Amount"] = cl["Amount"].astype(float)
xcl = cl.set_index("LedgerID").to_xarray()
xcl.rename_dims(dict(LedgerID="record"))

In [6]:
# Add a dummmy dimension, will make transposing easier later.
def identify_possible_transfers(ds):
    ds = ds.expand_dims("dummy")

    # Transfers from one account to another would have opposite signs on the amount and date would be similar.
    amount_matches = ds.Amount.data == -1 * ds.Amount.data.T
    date_matches = np.abs(ds.Date.data - ds.Date.data.T).astype('timedelta64[D]')  < np.timedelta64(5, "D")
    matchix = np.where(np.logical_and(amount_matches, date_matches))

    # Since (i, j) = (j, i) sort all pairs so that they can be compared, drop items that matched themselves, and use a set to drop duplicates.
    pairs = zip(matchix[0], matchix[1])
    pairs = list(set([tuple(np.sort(pair)) for pair in pairs if pair[0] != pair[1]]))
    matcharray = np.array(pairs).T
    # Remove dummy dimension.
    ds = ds.squeeze()

    # Convert match indexes to LedgerIDs and add as dataarray to ds.
    pairda = xr.DataArray(data=matcharray,
                          coords={"matchside": ["left", "right"],
                                  "matchID": np.arange(matcharray.shape[1])
                                 }
                         )
    matchLIDS = ds.LedgerID.isel(LedgerID=pairda)
    ds["matches"]=matchLIDS

    # Add a dataarray to hold confirmation information
    ds["is_match"] = xr.DataArray(data=np.full((len(ds.matchID),),np.NaN),
                                         dims=["matchID"],
                                         attrs={"Note": f"{np.nan} means not reviewed"})

    return ds



In [7]:
xcl = identify_possible_transfers(xcl)
#matcharray = xcl.matches.data
#xcl = xcl.drop_vars("matches")
#xcl = xcl.drop_dims(["matchside", "matchID"])
xcl

In [8]:
def format_match(ds, matchID):
    vars = ["Ledger", "Date", "Description", "Amount", "Account"]
    m = (ds.loc[dict(LedgerID=xcl.matches.loc["left", matchID].data)],
         ds.loc[dict(LedgerID=xcl.matches.loc["right", matchID].data)])
    s = []
    for var in vars:
        s.append(f"{var}: {m[0][var].data}\n{(len(var)+3) * ' '}{m[1][var].data}.\n")
    return s
    

In [9]:
left = xcl.matches.loc[dict(matchside="left")].reset_coords("matchside", drop=True)
right = xcl.matches.loc[dict(matchside="right")].reset_coords("matchside", drop=True)
no_matches = xcl.drop_vars("matches").drop_dims(["matchside","matchID"])