In [1]:
import pandas as pd
import numpy as np
import pathlib
import datetime
from rich import print
from collections import namedtuple
from dataclasses import dataclass
import operator
# TODO: Create a rules that passes a set of conditions to pandas dataframe. For example: description contains and amount equals , not equals, etc.

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [6]:
def sel_factory(r):
    
    def func(df):
        bools = []
        for rule in r.select:
            match rule.op:
                case pd.Series.str.contains | "contains":
                    bools.append(df[rule.column].str.contains(rule.b))
                case _:
                    try:
                        bools.append(rule.op(rule.column, rule.b))
                    except (TypeError, AttributeError):
                        bools.append(rule.op(rule.a, rule.b))
        bool = bools.pop()
        if len(bools) > 0:
            for b in bools:
                bool = bool & b
        return bool
    return func
    

def apply_rule(rule, df):
    for op in rule.apply:
        match op:
            case "=" | "assign":    
                df = df.loc[df[sel_factory(rule)], item["column"]] = item["value"]
            case "_":
                raise NotImplementedError
    return df

class Rule:
    def __init__(self, *args):
        self.select = []
        self.apply = []
        for arg in args:
            if isinstance(arg, SelectOp):
                self.select.append(arg)
            elif isinstance(arg, ApplyOp):
                self.apply.append(arg)

    def __add__(self, other):
        if isinstance(other, ApplyOp):
            self.apply.append(other)
        elif isinstance(other, SelectOp):
            self.select.append(other)
        elif isinstance(other, Rule):
            self.select += other.select
            self.apply += other.apply
        else:
            raise TypeError("Cannot add unknown rule.")

class SelectOp:
    def __init__(self, op, column=None, a=None, b=None):
        if column is None and a is None:
            raise TypeError("Column or a must be set.")
        self.op = op
        self.column = column
        self.b = b
        
    def __add__(self, other):
        return Rule([self, other])

class ApplyOp:
    def __init__(self, op, column,b=None):
        self.op = op
        self.column = column
        self.b = b
    def __add__(self, other):
        return Rule([self, other])

SOP = SelectOp
AOP = ApplyOp

In [7]:
pth = pathlib.Path("/Users/kdavis10").resolve() / "als" / 'csvbdata/bank'

In [8]:
try:
    del apl
except NameError:
    pass

apl = pd.read_csv(pth / pathlib.Path("Jan-2024_Apple_CC.csv"),
                    header=0, parse_dates=["Transaction Date", "Clearing Date"])

apl = apl.rename(columns={"Amount (USD)": "Amount"})
apl["Amount"] = -1 * apl["Amount"]
apl["To"] = ""

In [9]:
df = apl

rules = [Rule(SOP("contains", "Merchant", b="Banana Republic"), AOP("assign", "To", "Expenses:Clothing"))]
#         Rule("Merchant", "Apple Services", "To", "Expenses:Subscriptions"),
#         Rule("Merchant", "Phillips 66", "To", "Expenses:Auto:Gas"),
#         Rule("Merchant", "Ridge Wallet", "To", "Expenses:Gear"),
#         Rule("Merchant", "Martin's Supermarket", "To", "Expenses:Groceries"),
#         Rule("Merchant", "Apple Cash Payment", "To", "Income:Apple Cash"),
#         Rule("Merchant", "Hulu", "To", "Expenses:Entertainment"),
#         Rule("Merchant", "Trade Coffee CO", "To", "Expenses:Groceries"),
#         Rule("Merchant", "Beacon Med", "To", "Expenses:Medical:Body"),
#         Rule("Merchant", "South Bend Clinic", "To", "Expenses:Medical:Body"),
#         Rule("Merchant", "WMMH", "To", "Expenses:Entertainment:Self"),
#         Rule("Merchant", "Account Ending In 8785", "To", "Assets:1st Source:Checking"),
#         Rule("Description", "ABEBOOKS", "To", "Expenses:Study"),
#         Rule("Merchant", "bookshop.org", "To", "Expenses:Study"),
#         Rule("Merchant", "Patreon", "To", "Expenses:Entertainment"),
#         Rule("Merchant", "Monthly Installments", "To", "Liabilities:Apple Installments"),
#         Rule("Category", "Tolls", "To", "Expenses:Auto:Tolls"),
#         Rule("Merchant", "Kobo", "To", "Expenses:Entertainment:Reading"),
#         Rule("Merchant", "Vtsup.com", "To", "Expenses:Entertainment:Self"),
#         Rule("Merchant", "City Of South Bend", "To", "Expenses:Utilities"),
#         Rule("Merchant", "Andrasi", "To", "Expenses:Home:Cleaning"),
#         Rule("Merchant", "Parlevel Texas", "To", "Expenses:Restaurants"),
#         Rule("Merchant", "Gannett Newsprpr", "To", "Expenses:Utilities"),

                                    

#cl.loc[cl["Merchant"].str.contains("Banana Republic"),"To"] = "Expenses:Clothing"
#cl.loc[cl["Merchant"].str.contains("Apple Services"),"To"] = "Expenses:Subscriptions"
#cl.loc[cl["Merchant"].str.contains("Phillips 66"),"To"] = "Expenses:Auto:Gas"
#cl.loc[cl["Merchant"].str.contains("SP Ridge Wallet"),"To"] = "Expenses:Gear"
#cl.loc[cl["Merchant"].str.contains("Martin's Supermarket"),"To"] = "Expenses:Groceries"
#cl.loc[cl["Category"].str.contains("Tolls"),"To"] = "Expenses:Auto:Tolls"
#cl.loc[cl["Merchant"].str.contains("Apple Cash Payment"),"To"] = "Income:Apple Cash"
#cl.loc[cl["Merchant"].str.contains("Hulu"),"To"] = "Expenses:Subscriptions:Entertainment"
#cl.loc[cl["Merchant"].str.contains("Trade Coffee CO"),"To"] = "Expenses:Groceries"
#cl.loc[cl["Merchant"].str.contains("Beacon Med"),"To"] = "Medical:Body"
#cl.loc[cl["Merchant"].str.contains("South Bend Clinic"),"To"] = "Medical:Body"
#cl.loc[cl["Merchant"].str.contains("WMMH"),"To"] = "Expenses:Entertainment:Self"

for r in rules:
    apl = apply_rule(r, df)
apl

Unnamed: 0,Transaction Date,Clearing Date,Description,Merchant,Category,Type,Amount,Purchased By,To
0,2024-01-31,2024-01-31,ACH DEPOSIT INTERNET TRANSFER FROM ACCOUNT END...,Ach Deposit Internet Transfer From Account End...,Payment,Payment,2315.13,Keith Davis,
1,2024-01-31,2024-01-31,APPLE CASH PAYMENT,Apple Cash Payment,Payment,Payment,40.45,Keith Davis,
2,2024-01-29,2024-01-31,APPLE.COM/BILL ONE APPLE PARK CUPERTINO 95014 ...,Apple Services,Other,Purchase,-5.99,Keith Davis,
3,2024-01-29,2024-01-31,TRADE COFFEE CO 268 SUMMER ST. 6TH FLOOR BOSTO...,Trade Coffee CO,Grocery,Purchase,-210.0,Keith Davis,
4,2024-01-27,2024-01-29,MARTIN'S SUPERMARKET 7355 HERITAGE SQUARE D GR...,Martin's Supermarket,Grocery,Purchase,-208.69,Keith Davis,
5,2024-01-28,2024-01-29,APPLE.COM/BILL ONE APPLE PARK WAY 866-712-7753...,Apple Services,Other,Purchase,-3.49,Keith Davis,
6,2024-01-26,2024-01-26,PHILLIPS 66 - 0082 FIRE DAY RD MISHAWAKA 46545...,Phillips 66,Gas,Purchase,-8.43,Keith Davis,
7,2024-01-24,2024-01-26,BANANA REPUBLIC ON-LIN6007 GREEN POINTE DR S G...,Banana Republic,Shopping,Purchase,-107.0,Keith Davis,
8,2024-01-24,2024-01-25,BANANA REPUBLIC ON-LIN6007 GREEN POINTE DR S G...,Banana Republic,Shopping,Purchase,-214.0,Keith Davis,
9,2024-01-24,2024-01-25,BANANA REPUBLIC ON-LIN6007 GREEN POINTE DR S G...,Banana Republic,Shopping,Purchase,-160.5,Keith Davis,


In [32]:
pd.Series.str.contains(apl["Description"], "ABE")

AttributeError: 'Series' object has no attribute '_inferred_dtype'

In [7]:
try:
    del fst
except NameError:
    pass

fst = pd.read_csv(pth / pathlib.Path("Jan-2024_1st.csv"),
                    header=3, parse_dates=["Date"])

fst["Amount"] = np.nansum([fst["Amount Credit"],fst["Amount Debit"]], axis=0)
fst["To"] = ""

In [40]:
fst.loc[fst["Description"].str.contains("crossfit south b") & (fst["Amount"] != -225),"To"] = "Groceries"


In [52]:
fst["Amount"].__ne__(-225)

0      True
1      True
2      True
3     False
4      True
5      True
6      True
7      True
8      True
9      True
10     True
11     True
12     True
13     True
Name: Amount, dtype: bool

In [6]:
# Add a dummmy dimension, will make transposing easier later.
def identify_possible_transfers(ds):
    ds = ds.expand_dims("dummy")

    # Transfers from one account to another would have opposite signs on the amount and date would be similar.
    amount_matches = ds.Amount.data == -1 * ds.Amount.data.T
    date_matches = np.abs(ds.Date.data - ds.Date.data.T).astype('timedelta64[D]')  < np.timedelta64(5, "D")
    matchix = np.where(np.logical_and(amount_matches, date_matches))

    # Since (i, j) = (j, i) sort all pairs so that they can be compared, drop items that matched themselves, and use a set to drop duplicates.
    pairs = zip(matchix[0], matchix[1])
    pairs = list(set([tuple(np.sort(pair)) for pair in pairs if pair[0] != pair[1]]))
    matcharray = np.array(pairs).T
    # Remove dummy dimension.
    ds = ds.squeeze()

    # Convert match indexes to LedgerIDs and add as dataarray to ds.
    pairda = xr.DataArray(data=matcharray,
                          coords={"matchside": ["left", "right"],
                                  "matchID": np.arange(matcharray.shape[1])
                                 }
                         )
    matchLIDS = ds.LedgerID.isel(LedgerID=pairda)
    ds["matches"]=matchLIDS

    # Add a dataarray to hold confirmation information
    ds["is_match"] = xr.DataArray(data=np.full((len(ds.matchID),),np.NaN),
                                         dims=["matchID"],
                                         attrs={"Note": f"{np.nan} means not reviewed"})

    return ds



In [7]:
xcl = identify_possible_transfers(xcl)
#matcharray = xcl.matches.data
#xcl = xcl.drop_vars("matches")
#xcl = xcl.drop_dims(["matchside", "matchID"])
xcl

In [8]:
def format_match(ds, matchID):
    vars = ["Ledger", "Date", "Description", "Amount", "Account"]
    m = (ds.loc[dict(LedgerID=xcl.matches.loc["left", matchID].data)],
         ds.loc[dict(LedgerID=xcl.matches.loc["right", matchID].data)])
    s = []
    for var in vars:
        s.append(f"{var}: {m[0][var].data}\n{(len(var)+3) * ' '}{m[1][var].data}.\n")
    return s
    

In [9]:
left = xcl.matches.loc[dict(matchside="left")].reset_coords("matchside", drop=True)
right = xcl.matches.loc[dict(matchside="right")].reset_coords("matchside", drop=True)
no_matches = xcl.drop_vars("matches").drop_dims(["matchside","matchID"])

In [53]:
type(df["Amount"].__eq__)

method

In [18]:
apl["Merchant"].str

<pandas.core.strings.accessor.StringMethods at 0x106d75c40>

In [21]:
getattr(apl["Merchant"], "str").contains("Banana")

0     False
1     False
2     False
3     False
4     False
5     False
6     False
7      True
8      True
9      True
10    False
11    False
12    False
13    False
14    False
15    False
16    False
17    False
18    False
19    False
20    False
21    False
22    False
23    False
24    False
25    False
26    False
27    False
28    False
29    False
30    False
31    False
32    False
33    False
34    False
35    False
36    False
37    False
38    False
39    False
40    False
Name: Merchant, dtype: bool

In [37]:
from operator import ne, contains
contains(apl["Merchant"].str,"Apple Services")

TypeError: argument of type 'StringMethods' is not iterable

In [39]:
help(apl["Merchant"].str)

Help on StringMethods in module pandas.core.strings.accessor object:

class StringMethods(pandas.core.base.NoNewAttributesMixin)
 |  StringMethods(data) -> 'None'
 |
 |  Vectorized string functions for Series and Index.
 |
 |  NAs stay NA unless handled otherwise by a particular method.
 |  Patterned after Python's string methods, with some inspiration from
 |  R's stringr package.
 |
 |  Examples
 |  --------
 |  >>> s = pd.Series(["A_Str_Series"])
 |  >>> s
 |  0    A_Str_Series
 |  dtype: object
 |
 |  >>> s.str.split("_")
 |  0    [A, Str, Series]
 |  dtype: object
 |
 |  >>> s.str.replace("_", "")
 |  0    AStrSeries
 |  dtype: object
 |
 |  Method resolution order:
 |      StringMethods
 |      pandas.core.base.NoNewAttributesMixin
 |      builtins.object
 |
 |  Methods defined here:
 |
 |  __getitem__(self, key)
 |
 |  __init__(self, data) -> 'None'
 |      Initialize self.  See help(type(self)) for accurate signature.
 |
 |  __iter__(self) -> 'Iterator'
 |
 |  capitalize(self)
