Skip to content

Commit

Permalink
add fix for incorrect years for jan statements
Browse files Browse the repository at this point in the history
  • Loading branch information
Bizzaro committed Jul 15, 2022
1 parent 2a000aa commit 2d826cf
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 6 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,12 @@ tl;dr - this parses PDFs with pdfplumber into text, then runs a bunch of regex o
- Use a venv

```
# linux
# macOS/linux
python3 -m venv venv
source venv/bin/activate
# windows
python3 -m venv venv
source venv/Scripts/activate
```

Expand Down
10 changes: 9 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,9 @@
pdfplumber==0.5.23
chardet==5.0.0
pdfminer.six==20200517
pdfplumber==0.5.23
Pillow==9.2.0
pycryptodome==3.15.0
python-dateutil==2.8.2
six==1.16.0
sortedcontainers==2.4.0
Wand==0.6.7
1 change: 0 additions & 1 deletion teller/model.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from enum import Enum


class AccountType(Enum):
AMEX = 'AMEX'
TD = 'TD'
Expand Down
16 changes: 13 additions & 3 deletions teller/pdf_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from pathlib import Path
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
from teller.model import Transaction, AccountType

TARGET_FI = 'BMO'
Expand All @@ -15,7 +16,7 @@
'txn': (r"^(?P<dates>(?:\w{3}(\.|)+ \d{1,2} ){2})"
r"(?P<description>.+)\s"
r"(?P<amount>-?[\d,]+\.\d{2})(?P<cr>(\-|\s?CR))?"),
'startyear': r'PERIOD COVERED BY THIS STATEMENT\n.+(?P<year>-?\,.[0-9][0-9][0-9][0-9])',
'startyear': r'PERIOD COVERED BY THIS STATEMENT\n\w+\.\s{1}\d+\,\s{1}(?P<year>[0-9]{4})',
'openbal': r'Previous Balance.*(?P<balance>-?\$[\d,]+\.\d{2})(?P<cr>(\-|\s?CR))?',
'closingbal': r'(?:New) Balance,.* (?P<balance>-?\$[\d,]+\.\d{2})(?P<cr>(\-|\s?CR))?'
},
Expand Down Expand Up @@ -81,6 +82,8 @@ def _parse_visa(pdf_path):
opening_bal = _get_opening_bal(text, TARGET_FI)
closing_bal = _get_closing_bal(text, TARGET_FI)
# add_seconds = 0

endOfYearWarning = True

# debugging transaction mapping - all 3 regex in 'txn' have to find a result in order for it to be considered a 'match'
for match in re.finditer(regexes[TARGET_FI]['txn'], text, re.MULTILINE):
Expand All @@ -90,13 +93,20 @@ def _parse_visa(pdf_path):
date[0] = date[0].strip('.') # Aug. -> Aug
date.append(str(year))
date = ' '.join(date) # ['Aug', '10', '2021'] -> Aug 10 2021

try:
date = datetime.strptime(date, '%b %d %Y') # try Aug 10 2021 first
except: # yes I know this is horrible, but this script runs once if you download your .csvs monthly, what do you want from me
date = datetime.strptime(date, '%m %d %Y') # if it fails, 08 10 2021

# checks credit balance regex
# need to account for current year (Jan) and previous year (Dec) in statements
endOfYearCheck = date.strftime("%m")

if (endOfYearCheck == '12' and endOfYearWarning == False):
endOfYearWarning = True
if (endOfYearCheck == '01' and endOfYearWarning):
date = date + relativedelta(years = 1)

if (match_dict['cr']):
print("Credit balance found in transaction: '%s'" % match_dict['amount'])
amount = -float("-" + match_dict['amount'].replace('$', '').replace(',', ''))
Expand Down

0 comments on commit 2d826cf

Please sign in to comment.