Merge pull request #5 from tomquirk/master

Major NAB updates
ArtS · Sep 17, 2017 · 9ad0da6 · 9ad0da6
2 parents b21f7e3 + 5518225
commit 9ad0da6
Show file tree

Hide file tree

Showing 9 changed files with 170 additions and 167 deletions.
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,7 @@ transactions.db
 .credentials
 *.pyc
 .DS_Store
+.venv
+.vscode/
+**/*.qif.*
+**/*.qif
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
-# Smart transactions exporter for NAB #
+# Smart transactions exporter for NAB
 
-### Intro #
+## Introduction
 
 I hated having to login to NAB's clunky Internet banking and fight with its forms to get my data, so I decided I'd better automate this process.
 
@@ -10,44 +10,39 @@ files. You can use QIF to load in your desktop or online account software, such
 The tool is smart enough not to export pending/clearing trasactions, so in theory
 you shouldn't get any duplicates.
 
+### Prerequisites
 
-### Prerequisites ##
-
-You need to have the following:
-
-- An account with NAB
-- Internet banking login details - username and password
 - Python 2.7
-- Python libs mechanize and BeautifulSoup installed
+- An account with NAB, as well as internet banking login details (username and password)
 
-### Usage ##
+### Usage
 
-Simply start *export.py* from command line. It will ask you for username and password. If you are running it for the first time be prepared to wait since it's going to fetch all available transactions for all of your accounts, which may take a while.
-
-After it is done, you'll see folders like that
+1. Install the virtual environment with dependencies
 
+```bash
+virtualenv --python=python2 .venv && source .venv/bin/activate && pip install -r requirements.txt
 ```
-lrwxr-xr-x   1 art  staff      63 12 May 18:06 083081-16xxxxxxx
-lrwxr-xr-x   1 art  staff      63 12 May 18:07 083081-17xxxxxxx
-lrwxr-xr-x   1 art  staff      63 12 May 18:08 083081-17xxxxxxx
+
+2. Run the program
+
+```bash
+python export.py
 ```
 
+3. Enter your username and password
+
 each of those will contain your QIF files for a given account.
 
-###Privacy and Security###
+### Privacy and Security
 
 The tool does not use your username and password for anything except logging into NAB's website.
 
 It does not store your username and password on the disk, you have to enter them every time you run it.
 
 You can avoid that by creating a file named *.credentials* in the same folder. The file needs to have two lines, with username on the first line and password on the second line.
 
+### License
 
-###Warranty###
-The Software is provided "as is" without warranty of any kind, either express or implied, including without limitation any implied warranties of condition, uninterrupted use, merchantability, fitness for a particular purpose, or non-infringement
-
-
-###License###
 Copyright (C) 2012 Artem Skvira
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

diff --git a/export.py b/export.py
@@ -17,8 +17,10 @@ def get_last_transaction_date(b, account):
 
     if not last_date:
 
-        print('\tWe don\'t seem to have any transactins for account \'%s\' in database.' % account['name'])
-        print('\tThat\'s OK though! Let\'s retrieve transactions for last %s days' % MAX_HISTORY_DAYS)
+        print('\tWe don\'t seem to have any transactions for account \'%s\' in database.' %
+              account['name'])
+        print('\tThat\'s OK though! Let\'s retrieve transactions for last %s days' %
+              MAX_HISTORY_DAYS)
 
         last_date = today - timedelta(days=MAX_HISTORY_DAYS)
 
@@ -28,8 +30,10 @@ def get_last_transaction_date(b, account):
               {'acc': account['acc_no']})
 
         if (today - last_date).days > MAX_HISTORY_DAYS:
-            print('Looks like the oldest transaction in the DB is older than %s days' % MAX_HISTORY_DAYS)
-            print('Retrieving transactions for only last %s days' % MAX_HISTORY_DAYS)
+            print('Looks like the oldest transaction in the DB is older than %s days' %
+                  MAX_HISTORY_DAYS)
+            print('Retrieving transactions for only last %s days' %
+                  MAX_HISTORY_DAYS)
             last_date = today - timedelta(days=MAX_HISTORY_DAYS)
 
     return last_date
@@ -77,7 +81,8 @@ def export():
         return
 
     for account in accounts:
-
+        account['acc_no'] = account['acc_no'].replace('-', '')
+        account['bsb'] = account['bsb'].replace('-', '')
         print('\nProcessing account \'%s\' (BSB: %s Number: %s)' % (
             account['name'], account['bsb'], account['acc_no']))
 

diff --git a/lib/browser.py b/lib/browser.py
@@ -1,25 +1,23 @@
-
 import re
 from datetime import datetime
 
-from mechanize import Browser, ControlNotFoundError
-from mechanize import _http
+from mechanize import Browser, ControlNotFoundError, _http
 from bs4 import BeautifulSoup
 from bs4.element import NavigableString
 
-from lib.tools import make_password, get_credentials, write_step, read_step
-from lib.tools import parse_transaction_date
+from lib.tools import make_password, get_credentials, write_step, read_step, parse_transaction_date
 
 #
 # There's could be more than one URL when you log in
 #
 logged_in_urls = ['https://ib.nab.com.au/nabib/acctInfo_acctBal.ctl',
                   'https://ib.nab.com.au/nabib/loginProcess.ctl']
+TRANSACTIONS_PER_PAGE = 200
 
 
 def get_accounts(text):
 
-    soup = BeautifulSoup(text)
+    soup = BeautifulSoup(text, "html.parser")
     account_divs = soup.select('.acctDetails')
     if len(account_divs) == 0:
         print('\tNo accounts found.')
@@ -78,10 +76,15 @@ def login():
 
     b = Browser()
     b.set_handle_robots(False)
-    b.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
+    b.addheaders = [
+        ('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1'),
+        ('Connection', 'keep-alive'),
+        ('Cache-Control', 'max-age=0'),
+        ('Accept-Encoding', 'gzip, deflate, br')
+    ]
 
     b.set_handle_equiv(True)
-    #b.set_handle_gzip(True)
+    b.set_handle_gzip(True)
     b.set_handle_redirect(True)
     b.set_handle_referer(True)
     b.set_handle_robots(False)
@@ -90,11 +93,9 @@ def login():
     b.set_handle_refresh(_http.HTTPRefreshProcessor(), max_time=1)
 
     # Want debugging messages?
-    #b.set_debug_http(True)
+    # b.set_debug_http(True)
     b.set_debug_redirects(True)
     b.set_debug_responses(True)
-
-
     print 'Opening main page...'
     b.open('http://www.nab.com.au')
     print 'OK'
@@ -120,15 +121,21 @@ def login():
     newPassword = make_password(creds[1], webKey, webAlpha)
 
     usernameCtrl = b.form.find_control(name='userid')
-    passwordCtrl = b.form.find_control(name='password')
+    passwordCtrl = b.form.find_control(name='encoded-password')
+    passwordCtrl.readonly = False
     usernameCtrl.value = creds[0]
     passwordCtrl.value = newPassword
 
+    rawPassword = b.form.find_control(name='password')
+    rawPassword.value = ''
+
     b_data = b.form.find_control(name='browserData')
     b_data.readonly = False
-    b_data.value = '1332067415674;z=-660*-600;s=1440x900x24;h=325b2e41;l=en-US;p=MacIntel;i=0;j=7;k=0;c=81d6c46c:,799e53ad:,f67180ac:,c801b011:,9ed81ce8:,68bab54a:,3db529ef,97362cfc;'
+    b_data.value = '1496488636702;z=-600*-600;s=1440x900x24;l=en-GB;p=MacIntel;h=1Z3uS;i=33;j=117;k=16;c=d3d3Lm5hYi5jb20uYXUvc3RhdGljL0lCL2xvZ2luQmFubmVyLw;n=bG9naW5Gb3Jt,bG9naW5UaXBz;e=Y3ZpZXcz;b=1JE4yQ,24uNEg,2wDBVE;a=1GeUEa,1TaPsP,1ZO-16,1rEqxh,2.jbKy,21b2P5,2Jrfu6,2LmSef,2TqVCf,2Ubrnm,2dgqqB,3MkcJZ,JIGdn,eqyBa,lTM8m;o=Y29uc29sZQ,Y2hyb21l,YW5ndWxhcg,YXBpTG9nb3V0QXBw,Z2V0QnJvd3Nlcg,alF1ZXJ5MTEwMjA4MzYwNzIxMDQ4NTY0MjY0;t=fo4f0ot8-600.j3h6ekzf.877;d=YWNz,Ym9keWNvbnRhaW5lcg,Ym9keWNvbnRhaW5lcl9pbnNpZGU,YmFubmVy,ZXJyb3JNZXNzYWdl,ZXJyb3JOdW1iZXI,Zm9vdGVyX2xvZ2lu,ZmFuY3ktYmctZQ,ZmFuY3ktYmctbg,ZmFuY3ktYmctbmU,ZmFuY3ktYmctbnc,ZmFuY3ktYmctc2U,ZmFuY3ktYmctc3c,ZmFuY3ktYmctcw,ZmFuY3ktYmctdw,ZmFuY3lib3gtY2xvc2U,ZmFuY3lib3gtaW5uZXI,ZmFuY3lib3gtb3V0ZXI,ZmFuY3lib3gtb3ZlcmxheQ,ZmFuY3lib3gtbG9hZGluZw,ZmFuY3lib3gtbGVmdA,ZmFuY3lib3gtbGVmdC1pY28,ZmFuY3lib3gtcmlnaHQ,ZmFuY3lib3gtcmlnaHQtaWNv,ZmFuY3lib3gtd3JhcA,ZmFuY3lib3gtdG1w,aGVhZGVy,aWItdXNlci10ZXh0,bG9naW5Gb3Jt,bGlua3Mtc29jaWFsLW1lZGlh,bWFpblBhZ2U;u=ZHVtbXk,ZW5jb2RlZC1wYXNzd29yZA,d2ViQWxwaGE,d2ViS2V5;v=bmVlZC1oZWxw;x=1IVClf,1KxWAP,1SURBl,1Wl6jj,1vhE2s,1vstXM,1wlzQT,1yYwT1,2-PmTs,2APt-x,2FOxw2,2Lnxl,2ceYJE,2feZ0x,2g4LgQ,2h079f,2oK-0A,2ueFc7,34liSK,39CTWT,3GxyfT,3T6P3H,3XvqP.,3kcnCG,3ktPLw,3l39dK,660SR,68npD,8Vcav,JOS8B,cTezC,dwOmq,ix9Ek,s-ZAp;q=ZnJhdWQ;w=428866'
 
-    b.form.new_control('text', 'hidden', {'name': 'login', 'value': 'Login'})
+    b.form.new_control('text', 'login', {'value': ''})
+    b.form.fixup()
+    b['login'] = 'Login'
 
     print('Logging in...')
     b.submit()
@@ -143,7 +150,7 @@ def login():
 
 def extract_transactions(content):
 
-    soup = BeautifulSoup(content)
+    soup = BeautifulSoup(content, "html.parser")
     rows = soup.select('#transactionHistoryTable tbody tr')
     transactions = []
 
@@ -162,14 +169,20 @@ def extract_transactions(content):
         payee = details[0] if len(details) > 0 else ''
         memo = details[1] if len(details) > 1 else ''
 
+        def toFloat(text):
+            print(text)
+            if len(text) > 0:
+                return float(text[:-3].strip().replace(',', ''))
+            return None
+
         transactions.append({
             'date': tds[0].text,
             'date_obj': parse_transaction_date(tds[0].text),
             'payee': payee,
             'memo': memo,
-            'debit_amount': tds[2].text,
-            'credit_amount': tds[3].text,
-            'balance': tds[4].text
+            'debit_amount': toFloat(tds[2].text),
+            'credit_amount': toFloat(tds[3].text),
+            'balance': toFloat(tds[4].text)
         })
 
     return transactions
@@ -225,9 +238,12 @@ def query_server_transactions(b, start_date):
 
     end_date = get_servers_today_date(b)
     b.select_form(name='transactionHistoryForm')
+    b.form['periodModeSelect'] = ['Custom']
     b.form['periodFromDate'] = start_date.strftime('%d/%m/%y')
-
-    URL_SUBMIT_HISTORY_FORM = 'https://ib.nab.com.au/nabib/transactionHistoryValidate.ctl'
+    b.form['transactionsPerPage'] = [str(TRANSACTIONS_PER_PAGE)]
+    # https://ib.nab.com.au/nabib/transactionHistoryDisplay.ctl?filterIndicator=true
+    # https://ib.nab.com.au/nabib/transactionHistoryDisplay.ctl?filterIndicator=true
+    URL_SUBMIT_HISTORY_FORM = 'https://ib.nab.com.au/nabib/transactionHistoryDisplay.ctl?filterIndicator=true'
     b.form.action = URL_SUBMIT_HISTORY_FORM
 
     print('\tGetting transactions from %s to %s' % (start_date, end_date))
@@ -237,21 +253,12 @@ def query_server_transactions(b, start_date):
     if not check_url(b, URL_SUBMIT_HISTORY_FORM):
         return
 
-    # Check we actually got what we asked for
-    expr = 'Period:\\r\\n\\s*' + start_date.strftime('%d/%m/%y')
-
-    if not re.findall(expr, response):
-        print('\tIt doesn\'t look like I was able to get transactions')
-        print('\tCannot find string : %s in response' % expr)
-        return None
-
     print('\tOK')
 
     return b
 
 
 def get_all_transactions(b, account, start_date):
-
     b = query_server_transactions(b, start_date)
     if not b:
         return None
@@ -260,39 +267,33 @@ def get_all_transactions(b, account, start_date):
 
     # Extract and store all transactions into db
     while True:
-
         cont = b.response().read()
-        soup = BeautifulSoup(cont)
-
-        input = soup.select('input[name="pageNo"]')
-        if not input:
-            currPage = 1
-        else:
-            currPage = int(input[0].attrs['value'])
-        print('\tGetting transactions from page %s' % currPage)
+        soup = BeautifulSoup(cont, "html.parser")
 
+        currPage = -1
         new_trans = extract_transactions(cont)
+
         if len(new_trans) == 0:
             print('\tNo transactions found on page %s, that\'s strange.')
         else:
             trans.extend(new_trans)
+            print('\t' + str(len(new_trans)) + ' transactions added.')
 
-        # Links to all pages with history are kind of fucked-up
-        # there's no classes on them to identify, hence the need to find
-        # closest unique element and go via siblings
-        currPage += 1
-        transExp = soup.select('#transactionExport')[0]
+        # get transaction count
+        rawTransCount = soup.find_all('td', text=re.compile('Found:'))
+        if rawTransCount is None:
+            print('No transaction count found, must be error')
+            return None
 
-        # :contains does not seem to work, using .find()
-        pageLink = list(transExp.nextSiblingGenerator())[1].find('a', text=str(currPage))
-        if not pageLink:
-            print('\tNo more pages available, finished processing')
+        transactionCount = int(rawTransCount[0].get_text().split(' ')[1])
+        if len(trans) >= transactionCount:
+            print('No more transactions')
             break
 
-        print('\tOpening page #%s...' % currPage)
-        b.open('https://ib.nab.com.au' + pageLink.attrs['href'])
-        print('\tOK')
-
-    return trans
+        currPage += 1
 
+        print('\tOpening page #%d...' % currPage)
+        b.open(
+            'https://ib.nab.com.au/nabib/transactionHistoryGetSettings.ctl#' + str(currPage))
 
+    return trans