From 2d826cf9e6d96f8498411ea945494cabdd0dd7a8 Mon Sep 17 00:00:00 2001
From: Bizzaro <Bizzaro@users.noreply.github.com>
Date: Fri, 15 Jul 2022 19:34:17 -0400
Subject: [PATCH] add fix for incorrect years for jan statements

---
 README.md               |  3 ++-
 requirements.txt        | 10 +++++++++-
 teller/model.py         |  1 -
 teller/pdf_processor.py | 16 +++++++++++++---
 4 files changed, 24 insertions(+), 6 deletions(-)
diff --git a/README.md b/README.md
index e4a7257..40e8572 100644
--- a/README.md
+++ b/README.md
@@ -33,11 +33,12 @@ tl;dr - this parses PDFs with pdfplumber into text, then runs a bunch of regex o
 - Use a venv
 
 ```
-# linux
+# macOS/linux
 python3 -m venv venv
 source venv/bin/activate
 
 # windows
+python3 -m venv venv
 source venv/Scripts/activate
 ```
 
diff --git a/requirements.txt b/requirements.txt
index c180914..17d46ad 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1,9 @@
-pdfplumber==0.5.23
\ No newline at end of file
+chardet==5.0.0
+pdfminer.six==20200517
+pdfplumber==0.5.23
+Pillow==9.2.0
+pycryptodome==3.15.0
+python-dateutil==2.8.2
+six==1.16.0
+sortedcontainers==2.4.0
+Wand==0.6.7
diff --git a/teller/model.py b/teller/model.py
index 94cefb3..0451ab8 100644
--- a/teller/model.py
+++ b/teller/model.py
@@ -1,6 +1,5 @@
 from enum import Enum
 
-
 class AccountType(Enum):
     AMEX = 'AMEX'
     TD = 'TD'
diff --git a/teller/pdf_processor.py b/teller/pdf_processor.py
index 53bddff..b592624 100644
--- a/teller/pdf_processor.py
+++ b/teller/pdf_processor.py
@@ -3,6 +3,7 @@
 
 from pathlib import Path
 from datetime import datetime, timedelta
+from dateutil.relativedelta import relativedelta
 from teller.model import Transaction, AccountType
 
 TARGET_FI = 'BMO'
@@ -15,7 +16,7 @@
         'txn': (r"^(?P<dates>(?:\w{3}(\.|)+ \d{1,2} ){2})"
             r"(?P<description>.+)\s"
             r"(?P<amount>-?[\d,]+\.\d{2})(?P<cr>(\-|\s?CR))?"),
-        'startyear': r'PERIOD COVERED BY THIS STATEMENT\n.+(?P<year>-?\,.[0-9][0-9][0-9][0-9])',
+        'startyear': r'PERIOD COVERED BY THIS STATEMENT\n\w+\.\s{1}\d+\,\s{1}(?P<year>[0-9]{4})',
         'openbal': r'Previous Balance.*(?P<balance>-?\$[\d,]+\.\d{2})(?P<cr>(\-|\s?CR))?',
         'closingbal': r'(?:New) Balance,.* (?P<balance>-?\$[\d,]+\.\d{2})(?P<cr>(\-|\s?CR))?'
     },
@@ -81,6 +82,8 @@ def _parse_visa(pdf_path):
         opening_bal = _get_opening_bal(text, TARGET_FI)
         closing_bal = _get_closing_bal(text, TARGET_FI)
         # add_seconds = 0
+        
+        endOfYearWarning = True
 
         # debugging transaction mapping - all 3 regex in 'txn' have to find a result in order for it to be considered a 'match'
         for match in re.finditer(regexes[TARGET_FI]['txn'], text, re.MULTILINE):
@@ -90,13 +93,20 @@ def _parse_visa(pdf_path):
             date[0] = date[0].strip('.') # Aug. -> Aug
             date.append(str(year))
             date = ' '.join(date) # ['Aug', '10', '2021'] -> Aug 10 2021
-
+            
             try:
                 date = datetime.strptime(date, '%b %d %Y') # try Aug 10 2021 first
             except: # yes I know this is horrible, but this script runs once if you download your .csvs monthly, what do you want from me
                 date = datetime.strptime(date, '%m %d %Y') # if it fails, 08 10 2021
 
-            # checks credit balance regex
+            # need to account for current year (Jan) and previous year (Dec) in statements 
+            endOfYearCheck = date.strftime("%m")
+
+            if (endOfYearCheck == '12' and endOfYearWarning == False):
+                endOfYearWarning = True
+            if (endOfYearCheck == '01' and endOfYearWarning):
+                date = date + relativedelta(years = 1)
+
             if (match_dict['cr']):
                 print("Credit balance found in transaction: '%s'" % match_dict['amount'])
                 amount = -float("-" + match_dict['amount'].replace('$', '').replace(',', ''))