diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..1fefb99 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,10 @@ +[run] +include = + scripts/*.py + tests/*.py + www/*.py + +[report] +exclude_lines = + pragma: no cover + if __name__ == '__main__': diff --git a/.gitignore b/.gitignore index fe3cd1e..77f5327 100644 --- a/.gitignore +++ b/.gitignore @@ -3,13 +3,14 @@ backups data logs misc -realestate/static/pictures +www/static/pictures tests/data tests/misc # Files scripts/phantomjs scripts/ngrok +scripts/wires *.Python *.txt @@ -20,5 +21,3 @@ scripts/ngrok *.bmml *.csv *.out - -fabfile.py diff --git a/.travis.yml b/.travis.yml index baaa516..63f296d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,15 +1,14 @@ language: python sudo: false python: -- '2.7' +- '3.5' install: - pip install -r requirements.txt -before_script: -- export PYTHONPATH=$PYTHONPATH:$(pwd) +script: - mkdir logs - touch logs/realestate.log -script: -- coverage run tests/test*.py +- nosetests tests --with-coverage +- tox after_success: - coveralls deploy: @@ -18,23 +17,8 @@ deploy: secret_access_key: secure: Dh/dAklShJFyeh8jMTMe05BM20r///zb66k6f1VbwN26OrbWV1V6p4Ue83N1jTpFvBFc2C/XTHi/r42jRJU5H+zlJBc3Rwsly7aqHPXxee207Zi3Kgo62cqaZSLV4mnNlineFAkdjM82OhV1TKnT843F5L6gaSPJJHVRtz5Vrbk= bucket: lensnola - local-dir: realestate/static + local-dir: www/static upload-dir: realestate acl: public_read on: repo: TheLens/realestate -env: - global: - - secure: B43D3wsqZ/+a/1OA6T0Cqevr96bUs3iqqMilZNyiZnwjdu+j2qKQ0JFMhiaUDvnSW41xhkQR00Yi5q6G1rWJ95eM67k/hFVCabIo0nbqKgHVtTOvofV443al9svZW70gaKFdWfHmwizaGFEshdddoUd/Q4Ic30mNiL3/yiGZ9fk= - - secure: jEGWFRamEnNgYHUFgMXapDq2uyE8oBUrfbo/G/wQlzGAdOrZwdMd2fxdJ9QgNrGT91C1Wn+oYGM1yVUMP1A+u/4znS4h9uvu7XuDpjaJ0DkptcELuFo+LNNM+q8V7E9vu8EnVyNVpZm2nxRXHHE82pObWSCZ8JoQAFUAoru0cso= - - secure: u2u6Acr+P2Acjipu2E6BUP/ApkhEFXOFlxn7L/tbR1U1zh7sdce4FsK8oJM92XQzofe003n+liPiPSMi/gdVoxlW6r8XQ/HVQ/9ghJyZMV/H1M88B8Vn4Ql3THKODyJESITgnHM9gtUbXKdGxEqSLO/EJXaDw2GKKfLdMTb8ENM= - - secure: HxaMVE5UJhdLJRMvp0kHq4RTYMv3jLTBqGZZCCUcRfwGKDIo/ReBDyU4BpqMOD0iKkYljT1Z1lfllYq446qHHpTDTK+CgfqFq8Wly9PquaWbR8OxD7LbPs83B5NdyLvViMspheW7mqEyyEttXvoG2JhQjdq7LAUaVvypC1Zuf7I= - - secure: nqHuQttTrZE9tTfMN8kq69YOwDRIWnCspSCcgJshLFnPqKgRb9dKVsh6LbsQBlkyU4KoSBOr5JHKQrd8fZ4MCSKl8fSDHrlw5ras1AaUlFvqRHLW5GHJw/rctY9sLrvNVroa1qV7SUNtLHSWo4FHu97C1OEhDkqQ4vMrqHFoxSc= - - secure: qiAzbRqyI4lx7OaMkAzCLKczJruVnOYqj2t9vbWMojwxWYBXe+WEVknrS50cAq0Cf3gvMrBFL7roNtGmf9Q8YUlyYcMEI2ZyJQWI8JSC7+EztKXKmPGbvUHd144IplT7nQSNLgkxgEfo6zQsrZYloeZZLmo4FTwRRAUpPBJ/Srk= - - secure: EbEABi1NpNV3Uj4wpZ0S//d1n5DFQJQXHp2Po/aqzt5eoFI1Ox0BtSe6RZSaw2DDWbayfMuX61L4kiEX1m9ln9rENCp6xPxOKZOa4pgMjxRXax7m5aWRbXb1DxBUwf7ez+WHP+tnIMZUD9axZlxsfQmyeJowx4lgTzbtuN0qFBM= - - secure: WpmIBdrctRpFXwH5wAeSIt1jjlzGLzQ4xK67P2htdq5oJ318tJIN2nOMCMEkyeB/xiyK95c8C/40IHYNfzg69b68rBiVZwOQ+e6O9fkP0rJlHAQeCg7kAw5IBfvNfVtRHi7q+Yp0MGYXRVZg1x+5g4Z1lGJGst4JtpvXhTGoj+Q= - - secure: peQZ/79gBOnPqG0UZ62enI+Ny823uion0V3bOyGKlqeypXcA7VyMM2AKGr/F5Yz4wyDH1Ayt3eEY7KNm2Ud44R/FpGvJ5h+Wr1GxHnmVFtLewSyLHHB+qS6AHKPsmKzJr48z9OGlDuhYhUACoV3qL61lMRSq/DKGEWvUaSVBrJ0= - - secure: g33NRFXX+QZ+33RJWFhAB5DKrhN0xokH51TS7THYVxdAmXtT9FTnjpg8BWjt8jFslIeFele6yskw1mZ+GaCbCswo/u/Mj6ph+1dNc0uJ81NWL5jTh+vNhkRG/DVuxFMZajHQ+PT+ntYpGsb7hkA0u4bURnakn30NKFUt2fZSEH8= - - secure: TB6eW60s2F4a2HP4rackGgxPUIgdpucSRAJj6c6WERWdUysB2T75/VzKQE3JvYG+L5IKfl60C5hcxdERjPIC5bhIG+Xyo5OV9x5fuYPIgPqsoC0TZqImOn0hUZZAqyrtN8GF91sIIT26UbBbYtYE2RfNJ3nnDB596QSVrKkS8Js= - - secure: IHSrXacYVRnqTqJSKQQIwWfGHD/XYXNH0547V2i/AeM7nK3fdU5xLBoe9+rDWB+tWlB7UiIqqlKUw3erx6VmD8iIxw458mCY/EeYnOJK63AWKldPw41/z0+IUnzyahtOoYNlH9t08x75OVRyCkhrtfAujKUDkO4HSJti56Inbdk= - - secure: sLpFpxbiQAxFwcu/PUoJfYv0qVqHpUHU8KY2qeCfDLbcCSnk5R0u/B0VQKckqplwrd26Myfksqu8OqBuUUk/0fjqFyxplSMpbAPf7fjsCmeIDBM8F1DGANEFP75fhysXwP5cI0N+4mN82pNWuEvXp2OGNF2x/n/AnxghneUO7gk= diff --git a/README.md b/README.md index 7290a02..5312234 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ This app scrapes the latest property sales in New Orleans, stores the records in Every day, these two commands are run to scrape and then build, geocode, clean and publish the previous day's sales. These are summarized in `scripts/main.sh`, which is run on a cron job every night. ```bash -python realestate/lib/scrape.py +python scripts/scrape.py python scripts/initialize.py ``` @@ -32,7 +32,7 @@ Occasionally, due to bugs or password expiration, you will need to scrape and bu ```bash # -python realestate/lib/scrape.py 2016-05-01 2016-05-05 +python scripts/scrape.py 2016-05-01 2016-05-05 python scripts/initialize.py 2016-05-01 2016-05-05 ``` @@ -53,3 +53,37 @@ export REAL_ESTATE_LRD_PASSWORD='MyLandRecordsDivisionPassword' export REAL_ESTATE_DATABASE_USERNAME='MyDatabaseUsername' export REAL_ESTATE_DATABASE_PASSWORD='MyDatabasePassword' ``` + +#### Tests + +```bash +nosetests +``` + +```bash +tox +``` + +Run the app using Python 3. Set this up when creating the virtual environment. This is because OpenSSL/urllib3 has security flaws in older versions. + +```bash +mkvirtualenv TODO realestate +``` + +Run `nosetests tests` in the app's top-level directory. To show code coverage as well, run `nosetests tests --with-coverage`. + +__Different Python versions__ + +The app uses `tox` for testing different Python versions. The app is compatible and tested for Python 2.7 and 3.5. The basic usage: + +```bash +tox +``` + +You may need to first run `unset PYTHONPATH`. + +You can also run specific Python version tests like this: + +```bash +tox -e py35 +``` diff --git a/confs/app.ini b/confs/app.ini index c162dd4..aa4b204 100644 --- a/confs/app.ini +++ b/confs/app.ini @@ -1,22 +1,25 @@ [uwsgi] +master = true +workers = 2 +harakiri = 60 +max-requests = 100 +socket = 127.0.0.1:5004 + virtualenv = /home/ubuntu/.virtualenvs/realestate chdir = /home/ubuntu/realestate for-readline = /home/ubuntu/.virtualenvs/realestate/bin/postactivate env = %(_) -endfor = +endfor = for-readline = /home/ubuntu/.virtualenvs/postactivate env = %(_) -endfor = -wsgi-file = realestate/app.py +endfor = +wsgi-file = www/app.py callable = app -touch-reload = /home/ubuntu/realestate/realestate/app.py -socket = 127.0.0.1:5004 +touch-reload = /home/ubuntu/realestate/www/app.py logto = /var/log/uwsgi/realestate.log +log-maxsize = 10500000 uid = ubuntu gid = ubuntu -die-on-term -catch-exceptions -workers = 2 -harakiri = 120 -max-requests = 200 -master +die-on-term = true +catch-exceptions = true +vacuum = true diff --git a/confs/realestate.conf b/confs/realestate.conf index 84612fd..1b67ee1 100644 --- a/confs/realestate.conf +++ b/confs/realestate.conf @@ -1,6 +1,7 @@ +description "uWSGI instance to serve realestate app." + start on runlevel [2345] stop on runlevel [!2345] - respawn script diff --git a/crontab b/crontab new file mode 100644 index 0000000..023c711 --- /dev/null +++ b/crontab @@ -0,0 +1,3 @@ +# Real estate +*/5 * * * * cd /home/ubuntu/realestate && git fetch origin master && git reset --hard FETCH_HEAD # Sync with GitHub master branch +0 3 * * * /home/ubuntu/realestate/scripts/main.sh # Scrapes and builds yesterday's sales diff --git a/docs/contracts.rst b/docs/contracts.rst new file mode 100644 index 0000000..c36fbdd --- /dev/null +++ b/docs/contracts.rst @@ -0,0 +1,16 @@ +.. _contracts: + +Contracts +========= + +.. automodule:: contracts.app + :members: + +.. automodule:: contracts.db + :members: + +.. automodule:: contracts.models + :members: + +.. automodule:: contracts.views + :members: diff --git a/docs/misc.rst b/docs/misc.rst index eb3a18a..0900c78 100644 --- a/docs/misc.rst +++ b/docs/misc.rst @@ -12,32 +12,32 @@ Misc. .. automodule:: realestate.views :members: -.. automodule:: realestate.lib.delete_dates +.. automodule:: scripts.delete_dates :members: -.. automodule:: realestate.lib.email_template +.. automodule:: scripts.email_template :members: -.. automodule:: realestate.lib.form_tweet +.. automodule:: scripts.form_tweet :members: -.. automodule:: realestate.lib.get_dates +.. automodule:: scripts.get_dates :members: -.. automodule:: realestate.lib.libraries +.. automodule:: scripts.libraries :members: -.. automodule:: realestate.lib.mail +.. automodule:: scripts.mail :members: -.. automodule:: realestate.lib.results_language +.. automodule:: scripts.results_language :members: -.. automodule:: realestate.lib.stat_analysis +.. automodule:: scripts.stat_analysis :members: -.. automodule:: realestate.lib.twitter +.. automodule:: scripts.twitter :members: -.. automodule:: realestate.lib.utils +.. automodule:: scripts.utils :members: diff --git a/realestate/lib/delete_dates.py b/realestate/lib/delete_dates.py deleted file mode 100644 index cb524bb..0000000 --- a/realestate/lib/delete_dates.py +++ /dev/null @@ -1,110 +0,0 @@ -# -*- coding: utf-8 -*- - -''' -Accpts command line parameters for quick deletion of all records for a given -date or date range. Meant for quicker testing. - -```bash -python delete_dates.py '2014-02-18' # Deletes one day -python delete_dates.py '2014-02-18' '2014-02-19' # Deletes range -``` -''' - -import os -import sys -import psycopg2 - -from realestate.db import ( - Cleaned, - Detail -) -from realestate import log, SESSION, DATABASE_NAME - - -class DeleteDates(object): - - '''Deletes certain dates from database.''' - - def __init__(self, initial_date=None, until_date=None): - '''Initialize self variables and establish connection to database.''' - - self.conn = psycopg2.connect( - 'host=localhost dbname=%s user=%s password=%s' % ( - DATABASE_NAME, - os.environ.get('REAL_ESTATE_DATABASE_USERNAME'), - os.environ.get('REAL_ESTATE_DATABASE_PASSWORD') - ) - ) - self.cursor = self.conn.cursor() - - self.initial_date = initial_date - self.until_date = until_date - - log.debug('self.initial_date: %s', self.initial_date) - log.debug('self.until_date: %s', self.until_date) - - def main(self): - '''Run Join() and Clean() scripts.''' - - self.delete_details() - self.delete_cleaned() - self.vacuum() - - def vacuum(self): - '''docstring''' - - old_isolation_level = self.conn.isolation_level - self.conn.set_isolation_level(0) - sql = 'VACUUM;' - self.cursor.execute(sql) - self.conn.commit() - self.conn.set_isolation_level(old_isolation_level) - - def delete_details(self): - SESSION.query( - Detail - ).filter( - Detail.document_recorded >= self.initial_date - ).filter( - Detail.document_recorded <= self.until_date - ).delete() - - SESSION.commit() - - def delete_cleaned(self): - SESSION.query( - Cleaned - ).filter( - Cleaned.document_recorded >= self.initial_date - ).filter( - Cleaned.document_recorded <= self.until_date - ).delete() - - SESSION.commit() - -if __name__ == '__main__': - if len(sys.argv) < 2: - print ( - "No date(s) specified. Enter a single date to delete that " + - "one day or enter two days to delete a range of days. " + - "Use the format 'YYYY-MM-DD'.") - elif len(sys.argv) == 2: # One argument - day = sys.argv[1] - - DeleteDates( - initial_date=day, - until_date=day - ).main() - elif len(sys.argv) == 3: # Two arguments - initial_day = sys.argv[1] - until_day = sys.argv[2] - - DeleteDates( - initial_date=initial_day, - until_date=until_day - ).main() - elif len(sys.argv) > 3: - print ( - "Too many arguments. Enter a single date to delete that one " + - "day or enter two days to delete a range of days. " + - "Use the format 'YYYY-MM-DD'.") diff --git a/realestate/lib/form_tweet.py b/realestate/lib/form_tweet.py deleted file mode 100644 index f131944..0000000 --- a/realestate/lib/form_tweet.py +++ /dev/null @@ -1,276 +0,0 @@ -# -*- coding: utf-8 -*- - -''' -Does analysis and uses results to craft language of the tweet. Also takes -screenshot of that particular sale's map using `screen.js` (PhantomJS). -''' - -# todo: run this separate from 3 a.m. scrape/initialize/etc cron job. -# run this on a cron at same time we want to tweet. - -import sys -import os -from datetime import timedelta -from subprocess import call - -from realestate.db import Cleaned -from realestate import log, TODAY_DATE, PROJECT_DIR, DATABASE_NAME -from realestate.lib.twitter import Twitter - - -class AutoTweet(object): - - '''Runs logic to find what to tweet and forms language for tweet.''' - - def __init__(self): - '''Initialize self variables and establish connection to database.''' - pass - - @staticmethod - def figure_out_recorded_date(): - ''' - Treat Tuesday-Saturday like any other day. Don\'t do anything on - Sundays. Mondays tweet about entire previous week. - ''' - - document_recorded_early = '' - document_recorded_late = '' - time_period = '' - - if TODAY_DATE.strftime('%A') == 'Sunday': - sys.exit() - return - elif TODAY_DATE.strftime('%A') == 'Monday': - document_recorded_early = ( - TODAY_DATE - timedelta(days=7) - ).strftime( - '%Y-%m-%d' - ) - - document_recorded_late = ( - TODAY_DATE - timedelta(days=3) - ).strftime( - '%Y-%m-%d' - ) - - time_period = 'last week' - else: - document_recorded_early = ( - TODAY_DATE - timedelta(days=1) - ).strftime( - '%Y-%m-%d' - ) - - document_recorded_late = ( - TODAY_DATE - timedelta(days=1) - ).strftime( - '%Y-%m-%d' - ) - - time_period = ( - TODAY_DATE - timedelta(days=1) - ).strftime('%A') - - return_dict = {} - return_dict['document_recorded_early'] = document_recorded_early - return_dict['document_recorded_late'] = document_recorded_late - return_dict['time_period'] = time_period - - return return_dict - - def get_highest_amount_details(self, - document_recorded_early, - document_recorded_late): - '''Get the relevant fields about the sale with the highest amount.''' - - query = SESSION.query( - Cleaned.detail_publish, - Cleaned.document_recorded, - Cleaned.amount, - Cleaned.neighborhood, - Cleaned.instrument_no - ).filter( - Cleaned.detail_publish == '1' - ).filter( - Cleaned.document_recorded >= '%s' % document_recorded_early - ).filter( - Cleaned.document_recorded <= '%s' % document_recorded_late - ).order_by( - Cleaned.amount.desc() - ).limit(1).all() - - log.debug(query) - - # If no record found, terminate this script so it - # doesn't tweet out nonsense. - # Could be because of holidays, no records or old-fashioned bug. - if len(query) == 0: - sys.exit() - - query_dict = {} - - for row in query: - query_dict['amount'] = '$' + format(row.amount, ',') - query_dict['instrument_no'] = row.instrument_no - query_dict['neighborhood'] = row.neighborhood - # todo: neighborhood cleaning in clean.py? "Dev" => "Development" - - SESSION.close() - - return query_dict - - @staticmethod - def conversational_neighborhoods(neighborhood): - ''' - Converts neighborhoods to the way you would refer to them in - conversation. Ex. "French Quarter" => "the French Quarter." - ''' - - # todo: needs work - - nbhd_list = [ - 'BLACK PEARL' - 'BYWATER' - 'CENTRAL BUSINESS DISTRICT' - 'DESIRE AREA' - 'FAIRGROUNDS' - 'FISCHER DEV' - 'FLORIDA AREA' - 'FLORIDA DEV' - 'FRENCH QUARTER' - 'GARDEN DISTRICT' - 'IRISH CHANNEL' - 'LOWER GARDEN DISTRICT' - 'LOWER NINTH WARD' - 'MARIGNY' - 'SEVENTH WARD' - 'ST. BERNARD AREA' - 'ST. THOMAS DEV' - 'U.S. NAVAL BASE' - ] - - for nbhd in nbhd_list: - if neighborhood == nbhd: - neighborhood = 'the ' + neighborhood - - @staticmethod - def form_url(instrument_no): - '''Append instrument number to /realestate/sale/''' - - url = 'http://vault.thelensnola.org/realestate/sale/' + \ - instrument_no - - return url - - @staticmethod - def screenshot_name(instrument_no): - '''Form filename for map screenshot.''' - - name = '%s-%s-high-amount.png' % ( - TODAY_DATE, instrument_no) - - return name - - @staticmethod - def get_image(url, name): - '''Take screenshot of map with PhantomJS.''' - - log.debug('get_image') - log.debug('url: %s', url) - - call([ - '%s/scripts/phantomjs' % PROJECT_DIR, - '%s/scripts/screen.js' % PROJECT_DIR, - url, - '%s/realestate/static/pictures/tweets/%s' % ( - PROJECT_DIR, name) - ]) - - def open_image(self, url, name): - '''Get file path to screenshot.''' - - self.get_image(url, name) - - filename = '%s/realestate/' % PROJECT_DIR + \ - 'static/pictures/tweets/%s' % name - - return filename - - # with open(filename, 'rb') as image: - # return image - - @staticmethod - def form_message(time_period, neighborhood, amount, url): - '''Plug variables into mab lib sentences.''' - - log.debug('form_message') - - # options = [] - - message = ( - "Priciest property sale recorded {0} was in {1}: {2}.\n{3}" - ).format( - time_period, - neighborhood, - amount, - url - ) - - # option = random.choice(options) - - return message - - def main(self): - '''Runs through all methods.''' - - return_dict = self.figure_out_recorded_date() - - document_recorded_early = return_dict['document_recorded_early'] - log.debug( - 'document_recorded_early: %s', - document_recorded_early) - - document_recorded_late = return_dict['document_recorded_late'] - log.debug( - 'document_recorded_late: %s', - document_recorded_late) - - time_period = return_dict['time_period'] - log.debug('time_period: %s', time_period) - - try: - query_dict = self.get_highest_amount_details( - document_recorded_early, document_recorded_late) - except Exception, error: - log.exception(error, exc_info=True) - - amount = query_dict['amount'] - log.debug('amount: %s', amount) - - instrument_no = query_dict['instrument_no'] - log.debug('instrument_no: %s', instrument_no) - - neighborhood = query_dict['neighborhood'] - log.debug('neighborhood: %s', neighborhood) - - # todo: - # neighborhood = self.conversational_neighborhoods() - - url = self.form_url(instrument_no) - log.debug('url: %s', url) - - name = self.screenshot_name(instrument_no) - log.debug('name: %s', name) - - status = self.form_message(time_period, neighborhood, amount, url) - log.debug('status: %s', status) - - media = self.open_image(url, name) - log.debug('media: %s', media) - - print 'status:', status - print 'media:', media - # Twitter(status=status).send_with_media(media=media) - -if __name__ == '__main__': - AutoTweet().main() diff --git a/realestate/lib/results_language.py b/realestate/lib/results_language.py deleted file mode 100644 index 2112b8a..0000000 --- a/realestate/lib/results_language.py +++ /dev/null @@ -1,161 +0,0 @@ -# -*- coding: utf-8 -*- - -''' -Creates the results language on the /search page, such as, "10 sales found -for keyword 'LLC' in the French Quarter neighborhood where the price was -between $10,000 and $200,000 between Feb. 18, 2014, and Feb. 20, 2014.' -''' - -from realestate.lib.utils import Utils - - -class ResultsLanguage(object): - - '''Methods for each page in the app.''' - - def __init__(self, data): - '''Starting things off.''' - - self.data = data - - def plural_or_not(self): - '''Checks if more than one result.''' - - if self.data['number_of_records'] == 1: - plural_or_not = "sale" - else: - plural_or_not = "sales" - - return plural_or_not - - def add_initial_language(self, plural_or_not): - '''Creates initial sentence language.''' - - final_sentence = str(Utils().get_number_with_commas( - self.data['number_of_records'])) + ' ' + plural_or_not + ' found' - - return final_sentence - - def add_keyword_language(self, final_sentence): - '''Adds keyword or key phrase language.''' - - if self.data['name_address'] != '': - if len(self.data['name_address'].split()) > 1: - final_sentence += ' for key phrase "' + \ - self.data['name_address'] + '"' - # for 'keyword' - else: - final_sentence += ' for keyword "' + \ - self.data['name_address'] + '"' - # for 'keyword' - - return final_sentence - - def add_nbhd_zip_language(self, final_sentence): - '''Adds neighborhood or ZIP code language.''' - - if self.data['neighborhood'] != '': - if self.data['zip_code'] != '': - final_sentence += " in the " + self.data['neighborhood'] + \ - " neighborhood and " + self.data['zip_code'] - # in the Mid-City neighborhood and 70119 - else: - final_sentence += " in the " + self.data['neighborhood'] + \ - " neighborhood" - # in the Mid-City neighborhood - elif self.data['zip_code'] != '': - final_sentence += " in ZIP code " + self.data['zip_code'] - # in ZIP code 70119 - - return final_sentence - - def add_amount_language(self, final_sentence): - '''Adds amount language.''' - - if self.data['amount_low'] != '': - if self.data['amount_high'] != '': - final_sentence += " where the price was between " + \ - Utils().get_num_with_curr_sign(self.data['amount_low']) + \ - + " and " + \ - Utils().get_num_with_curr_sign(self.data['amount_high']) - # where the amount is between $10 and $20 - else: - final_sentence += " where the price was greater than " + \ - Utils().get_num_with_curr_sign(self.data['amount_low']) - # where the amount is greater than $10 - elif self.data['amount_high'] != '': - final_sentence += " where the price was less than " + \ - Utils().get_num_with_curr_sign(self.data['amount_high']) - # where the amount is less than $20 - - return final_sentence - - def add_date_language(self, final_sentence): - '''Adds date language.''' - - if self.data['begin_date'] != '': - if self.data['end_date'] != '': - final_sentence += " between " + \ - Utils().ymd_to_full_date( - self.data['begin_date'], - no_day=True) + \ - ", and " + \ - Utils().ymd_to_full_date( - self.data['end_date'], - no_day=True) - # between Feb. 10, 2014, and Feb. 12, 2014 - else: - final_sentence += " after " + \ - Utils().ymd_to_full_date( - self.data['begin_date'], - no_day=True) - # after Feb. 10, 2014. - elif self.data['end_date'] != '': - final_sentence += " before " + \ - Utils().ymd_to_full_date( - self.data['end_date'], - no_day=True) - # before Feb. 20, 2014. - - return final_sentence - - def add_map_filtering_language(self, final_sentence): - '''Adds language depending on whether map filtering is turned on.''' - - if self.data['map_button_state'] is True: - final_sentence += ' in the current map view' - - return final_sentence - - @staticmethod - def add_final_sentence_language(final_sentence): - '''Endings for the sentences.''' - - # Punctuation comes before quotation marks - if final_sentence[-1] == "'" or final_sentence[-1] == '"': - last_character = final_sentence[-1] - final_sentence_list = list(final_sentence) - final_sentence_list[-1] = '.' - final_sentence_list.append(last_character) - final_sentence = ''.join(final_sentence_list) - else: - final_sentence += '.' - - return final_sentence - - def main(self): - '''Runs through all sentence-building methods.''' - - plural_or_not = self.plural_or_not() - final_sentence = self.add_initial_language(plural_or_not) - final_sentence = self.add_keyword_language(final_sentence) - final_sentence = self.add_nbhd_zip_language(final_sentence) - final_sentence = self.add_amount_language(final_sentence) - final_sentence = self.add_date_language(final_sentence) - final_sentence = self.add_map_filtering_language(final_sentence) - final_sentence = self.add_final_sentence_language(final_sentence) - - return final_sentence - -if __name__ == '__main__': - pass diff --git a/realestate/lib/twitter.py b/realestate/lib/twitter.py deleted file mode 100644 index 2144208..0000000 --- a/realestate/lib/twitter.py +++ /dev/null @@ -1,91 +0,0 @@ -# -*- coding: utf-8 -*- - -''' -Methods for working with Twitter. -Takes message and attachment, forms tweet and sends tweet using Twython. -Performs checks on number of characters before sending to prevent an -#embarrassing moment. -''' - -import re -import os -from twython import Twython - - -class Twitter(object): - - '''Methods for working with the Twitter API.''' - - def __init__(self, status=None): - '''Make connection to Twitter API.''' - - self.status = status - self.twitter = Twython(os.environ.get('TWITTER_APP_KEY'), - os.environ.get('TWITTER_APP_SECRET'), - os.environ.get('TWITTER_OAUTH_TOKEN'), - os.environ.get('TWITTER_OAUTH_TOKEN_SECRET')) - - @staticmethod - def get_attachment(media): - '''Opens a file.''' - - attachment = open(media, 'rb') - - return attachment - - def check_for_urls(self): - '''Scan for URLs. Replace URL length if any found.''' - - url_length = 0 - - # URL is either followed by a whitespace or at the end of the line - urls = re.findall(r'(http[^\s|$]+)', self.status) - - for url in urls: - # Subtract length of URL text and replace with length of - # shortened URL (~22 characters) - url_length = url_length + len(url) - 22 - - return url_length - - def check_length(self, media=False): - '''Confirm that the status + attachments is <= 140 chars.''' - - length = 140 - len(self.status) - - # Find how many characters to add back: - # url_length = actual URL length - shortened URL length - url_length = self.check_for_urls() - - length = length + url_length - - # If there is a media attachment, subtract ~23 characters - if media: - length -= 23 - - if length < 0: - return False - else: - return True - - def send_as_text(self): - '''Send plain text tweet.''' - - assert self.check_length() - - self.twitter.update_status(status=self.status) - - def send_with_media(self, media=None): - '''Send tweet with media attachment.''' - - assert self.check_length(media=True) - - attachment = self.get_attachment(media) - - self.twitter.update_status_with_media( - status=self.status, media=attachment) - - attachment.close() - -if __name__ == '__main__': - pass diff --git a/realestate/lib/utils.py b/realestate/lib/utils.py deleted file mode 100644 index 5de4cdb..0000000 --- a/realestate/lib/utils.py +++ /dev/null @@ -1,174 +0,0 @@ -# -*- coding: utf-8 -*- - -''' -A collection of usefull utility functions, such as converting date formats -and converting integer dollar amounts to string currency formats. -''' - -import re -from datetime import datetime - - -class Utils(object): - - '''Common utility functions.''' - - zip_codes = [ - 70112, 70113, 70114, 70115, 70116, 70117, 70118, - 70119, 70121, 70122, 70123, 70124, 70125, 70126, - 70127, 70128, 70129, 70130, 70131, 70139, 70140, - 70141, 70142, 70143, 70145, 70146, 70148, 70149, - 70150, 70151, 70152, 70153, 70154, 70156, 70157, - 70158, 70159, 70160, 70161, 70162, 70163, 70164, - 70165, 70166, 70167, 70170, 70172, 70174, 70175, - 70176, 70177, 70178, 70179, 70181, 70182, 70183, - 70184, 70185, 70186, 70187, 70189, 70190, 70195 - ] - - @staticmethod - def convert_amount(amount): - '''Convert amounts to int type.''' - - amount = re.sub(r"\$", r"", amount) - amount = re.sub(r"\,", r"", amount) - - return int(float(amount)) - - @staticmethod - def get_number_with_commas(value): - '''Convert interger to string with commas.''' - - return "{:,}".format(value) - - @staticmethod - def get_num_with_curr_sign(value): - '''Convert integer to string with commas and dollar sign.''' - - value = int(value) - return "${:,}".format(value) - - @staticmethod - def ymd_to_mdy(value): - '''Convert yyyy-mm-dd to mm-dd-yyyy.''' - - if value is not None: - value = datetime.strptime(value, '%Y-%m-%d').date() - return value.strftime("%m-%d-%Y") - else: - return "None" - - @staticmethod - def ymd_to_mdy_slashes(value): - '''Convert yyyy-mm-dd to mm/dd/yyyy.''' - - if value is not None: - value = datetime.strptime(value, '%Y-%m-%d').date() - value = value.strftime("%m/%d/%Y") - return value - else: - return "None" - - @staticmethod - def ymd_to_full_date(value, no_day=False): - '''Convert yyyy-mm-dd to Day, Month Date, Year.''' - - if value is not None: - if isinstance(value, unicode): - # value = urllib.unquote(value).decode('utf8') - readable_date = str(value) - readable_date = datetime.strptime( - readable_date, '%m/%d/%Y').date() - readable_date = readable_date.strftime('%b. %-d, %Y') - - else: - # value = str(value) - if no_day is False: - readable_datetime = datetime.strptime( - value, '%Y-%m-%d').date() - readable_date = readable_datetime.strftime( - '%A, %b. %-d, %Y') - else: - readable_datetime = datetime.strptime( - value, '%Y-%m-%d').date() - readable_date = readable_datetime.strftime('%b. %-d, %Y') - - readable_date = readable_date.replace('Mar.', 'March') - readable_date = readable_date.replace('Apr.', 'April') - readable_date = readable_date.replace('May.', 'May') - readable_date = readable_date.replace('Jun.', 'June') - readable_date = readable_date.replace('Jul.', 'July') - - return readable_date # value.strftime('%A, %b. %-d, %Y') - - else: - return "None" - - @staticmethod - def convert_month_to_ap_style(month): - '''Convert month to AP Style. Ex. January => Jan.''' - - if re.match(r"[jJ][aA]", month) is not None: - month = "Jan." - - if re.match(r"[fF]", month) is not None: - month = "Feb." - - if re.match(r"[mM][aA][rR]", month) is not None: - month = "March" - - if re.match(r"[aA][pP]", month) is not None: - month = "April" - - if re.match(r"[mM][aA][yY]", month) is not None: - month = "May" - - if re.match(r"[jJ][uU][nN]", month) is not None: - month = "June" - - if re.match(r"[jJ][uU][lL]", month) is not None: - month = "July" - - if re.match(r"[aA][uU]", month) is not None: - month = "Aug." - - if re.match(r"[sS][eE]", month) is not None: - month = "Sept." - - if re.match(r"[oO][cC]", month) is not None: - month = "Oct." - - if re.match(r"[nN][oO]", month) is not None: - month = "Nov." - - if re.match(r"[dD][eE]", month) is not None: - month = "Dec." - - return month - - @staticmethod - def binary_to_english(bit): - '''Convert 0/1 to No/Yes.''' - - bit = int(bit) - conversion_dict = { - 0: "No", - 1: "Yes" - } - english = conversion_dict[bit] - return english - - @staticmethod - def english_to_binary(english): - '''Convert No/Yes to 0/1.''' - - # Accepts Yes, Y, yeah, yes sir, etc. - english = english[0].title() - conversion_dict = { - "N": 0, - "Y": 1 - } - bit = conversion_dict[english] - return bit - -if __name__ == '__main__': - pass diff --git a/realestate/templates/dashboard.html b/realestate/templates/dashboard.html deleted file mode 100644 index 5388793..0000000 --- a/realestate/templates/dashboard.html +++ /dev/null @@ -1,103 +0,0 @@ - - - - {% include 'head.html' %} - - - {% include 'banner.html' %} - -
-
-

Property sales dashboard

-
-
-
-
{{ num_results }} results found (reverse chronological order)...
-
- - {% for newrow in newrows %} -
-
-
-
-
- Instrument #: {{ newrow.instrument_no }} -
-
- -
-
- -
-
- -
-
- -
-
- -
-
- -
-
- -
-
- -
-
- -
-
- -
-
- -
-
- -
-
- -
- -
-
-
-
-
Current sale location:
-
-
-
- {% endfor %} -
- Are you sure you want to update sale #? - - -
- - - {% include 'js.html' %} - - - - - - - diff --git a/realestate/views.py b/realestate/views.py deleted file mode 100644 index d187696..0000000 --- a/realestate/views.py +++ /dev/null @@ -1,220 +0,0 @@ -# -*- coding: utf-8 -*- - -'''Renders the views.''' - -# from flask.ext.cache import Cache -from flask import ( - render_template, - jsonify, - make_response -) -from realestate.lib.utils import Utils -from realestate import ( - log, - LENS_JS, - INDEX_JS, - SEARCH_JS, - SEARCH_AREA_JS, - SALE_JS, - MAP_JS, - LENS_CSS, - REALESTATE_CSS, - TABLE_CSS, - BANNER_CSS, - JS_APP_ROUTING -) - - -class Views(object): - - '''Methods for each page in the app.''' - - def __init__(self): - '''Commonly accessed static files.''' - - # self.home_assets = { - # 'js': LENS_JS, - # 'css': LENS_CSS, - # 'index_js': INDEX_JS, - # 'search_area_js': SEARCH_AREA_JS, - # 'js_app_routing': JS_APP_ROUTING, - # 'zip_codes': Utils().zip_codes - # } - # self.search_assets = { - # 'js': LENS_JS, - # 'search_js': SEARCH_JS, - # 'search_area_js': SEARCH_AREA_JS, - # 'map_js': MAP_JS, - # 'css': LENS_CSS, - # 'js_app_routing': JS_APP_ROUTING, - # 'zip_codes': Utils().zip_codes - # } - # self.sale_assets = { - # 'js': LENS_JS, - # 'css': LENS_CSS, - # 'salejs': SALE_JS - # } - - def get_home(self, data): - '''Return view for /realestate/''' - - log.debug('get_home') - - response = make_response( - render_template( - 'index.html', - data=data, - # home_assets=self.home_assets - lens_js=LENS_JS, - lens_css=LENS_CSS, - realestate_css=REALESTATE_CSS, - banner_css=BANNER_CSS, - table_css=TABLE_CSS, - index_js=INDEX_JS, - search_area_js=SEARCH_AREA_JS, - js_app_routing=JS_APP_ROUTING, - zip_codes=Utils().zip_codes - ) - ) - - return response - - def get_search(self, data, newrows, js_data): - '''Return GET view for /realestate/search''' - - log.debug('get_search') - - response = make_response( - render_template( - 'search.html', - data=data, - newrows=newrows, - js_data=js_data, - # search_assets=self.search_assets - lens_js=LENS_JS, - search_js=SEARCH_JS, - search_area_js=SEARCH_AREA_JS, - map_js=MAP_JS, - lens_css=LENS_CSS, - realestate_css=REALESTATE_CSS, - banner_css=BANNER_CSS, - table_css=TABLE_CSS, - js_app_routing=JS_APP_ROUTING, - zip_codes=Utils().zip_codes - ) - ) - - return response - - @staticmethod - def post_search(data, newrows, js_data): - '''Return updated views for /realestate/search''' - - log.debug('post_search') - - log.debug('returned newrows') - log.debug(newrows) - - table_template = render_template( - 'table.html', - newrows=newrows - ) - - log.debug('returned js_data:') - log.debug(js_data) - - log.debug('returned data') - log.debug(data) - - return jsonify( - table_template=table_template, - js_data=js_data, - data=data - ) - - def get_sale(self, data, js_data, newrows): - '''Return GET view for /realestate/sale''' - - log.debug('get_sale') - - response = make_response( - render_template( - 'sale.html', - data=data, - newrows=newrows, - js_data=js_data, - # sale_assets=self.sale_assets - lens_js=LENS_JS, - lens_css=LENS_CSS, - realestate_css=REALESTATE_CSS, - banner_css=BANNER_CSS, - table_css=TABLE_CSS, - sale_js=SALE_JS - ) - ) - - return response - - # todo - def get_dashboard(self, data, newrows, js_data, parameters): - '''Return GET view for /realestate/dashboard''' - - response = make_response( - render_template( - 'dashboard.html', - data=data, - newrows=newrows, - js_data=js_data, - parameters=parameters, - lens_js=LENS_JS, - search_js=SEARCH_JS, - search_area_js=SEARCH_AREA_JS, - map_js=MAP_JS, - lens_css=LENS_CSS, - js_app_routing=JS_APP_ROUTING, - zip_codes=Utils().zip_codes - ) - ) - - return response - - # todo - def post_dashboard(self, data, newrows, js_data, parameters): - '''Return POST view for /realestate/dashboard''' - - response = make_response( - render_template( - 'dashboard.html', - data=data, - newrows=newrows, - js_data=js_data, - parameters=parameters, - lens_js=LENS_JS, - search_js=SEARCH_JS, - search_area_js=SEARCH_AREA_JS, - map_js=MAP_JS, - lens_css=LENS_CSS, # todo - js_app_routing=JS_APP_ROUTING, - zip_codes=Utils().zip_codes - ) - ) - - return response - - def get_error_page(self): - '''Return 404 error page.''' - - response = make_response( - render_template( - '404.html', - lens_css=LENS_CSS, # todo - lens_js=LENS_JS, - index_js=INDEX_JS - ) - ) - - return response, 404 - - -if __name__ == '__main__': - pass diff --git a/requirements.txt b/requirements.txt index fe29737..5f60712 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,21 +1,20 @@ alabaster==0.7.9 astroid==1.4.8 -awscli==1.10.59 Babel==2.3.4 backports.functools-lru-cache==1.2.1 bcdoc==0.16.0 beautifulsoup4==4.5.1 -botocore==1.4.49 -cffi==1.7.0 +botocore==1.4.53 +cffi==1.8.2 click==6.6 colorama==0.3.7 configparser==3.5.0 coverage==4.2 cryptography==1.5 +docopt==0.6.2 docutils==0.12 ecdsa==0.13 enum34==1.1.6 -Fabric==1.12.0 Flask==0.11.1 funcsigs==1.0.2 futures==3.0.5 @@ -23,7 +22,7 @@ GeoAlchemy2==0.3.0 googlemaps==2.4.4 idna==2.1 imagesize==0.7.1 -ipaddress==1.0.16 +ipaddress==1.0.17 isort==4.2.5 itsdangerous==0.24 Jinja2==2.8 @@ -34,11 +33,13 @@ MarkupSafe==0.23 mccabe==0.5.2 mock==2.0.0 nose==1.3.7 -oauthlib==1.1.2 +oauthlib==2.0.0 paramiko==2.0.2 pbr==1.10.0 pep8==1.7.0 +pluggy==0.3.1 psycopg2==2.6.2 +py==1.4.31 pyasn1==0.1.9 pycparser==2.14 pycrypto==2.6.1 @@ -51,14 +52,15 @@ PyYAML==3.12 requests==2.11.1 requests-oauthlib==0.6.2 rsa==3.4.2 -s3transfer==0.1.2 +s3transfer==0.1.3 selenium==2.53.6 sh==1.11 six==1.10.0 snowballstemmer==1.2.1 Sphinx==1.4.6 sphinx-rtd-theme==0.1.9 -SQLAlchemy==1.0.14 -twython==3.4.0 -Werkzeug==0.11.10 +SQLAlchemy==1.0.15 +tox==2.3.1 +virtualenv==15.0.3 +Werkzeug==0.11.11 wrapt==1.10.8 diff --git a/realestate/lib/__init__.py b/scripts/__init__.py similarity index 100% rename from realestate/lib/__init__.py rename to scripts/__init__.py diff --git a/realestate/lib/build.py b/scripts/build.py similarity index 61% rename from realestate/lib/build.py rename to scripts/build.py index c50c547..143d477 100644 --- a/realestate/lib/build.py +++ b/scripts/build.py @@ -1,79 +1,71 @@ # -*- coding: utf-8 -*- -''' -Receives sale HTML, hands off to parse.py, which returns structured data. +""" +Receive sale HTML, hands off to parse.py, which returns structured data. + This then commits the returned structured data. -''' +""" -import os +# import os import glob from datetime import datetime, timedelta from sqlalchemy import insert -from realestate import db -from realestate.lib import parse -from realestate import log, PROJECT_DIR, SESSION +from www import db +from www import log, PROJECT_DIR, SESSION +from scripts import parse class Build(object): - - '''Take structured data and enter into database.''' + """Take structured data and enter into database.""" def __init__(self, initial_date=None, until_date=None): - ''' - Create self variables for date range and establish connections to - the database. - ''' - + """Create class variables for date range and connect to database.""" self.initial_date = initial_date self.until_date = until_date - log.debug('self.initial_date: %s', self.initial_date) - log.debug('self.until_date: %s', self.until_date) + log.debug('self.initial_date: {}'.format(self.initial_date)) + log.debug('self.until_date: {}'.format(self.until_date)) def build_all(self): - '''Runs through all of the building methods.''' - + """Run through all of the building methods.""" log.debug('Build all') - print 'Building...' + print('Building...') log.debug('Detail') - print '\nAdding to details table for:' + print('\nAdding to details table for:') self.dict_parse('DetailParser', 'Detail') log.debug('Vendor') - print '\nAdding to vendors table for:' + print('\nAdding to vendors table for:') self.list_parse('VendorParser', 'Vendor') log.debug('Vendee') - print '\nAdding to vendees table for:' + print('\nAdding to vendees table for:') self.list_parse('VendeeParser', 'Vendee') log.debug('Location') - print '\nAdding to locations table for:' + print('\nAdding to locations table for:') self.list_parse('LocationParser', 'Location') def dict_parse(self, parser_name, table): - ''' - Parses data structured in a dict, which is how `details` returns. - ''' - + """Parse data structured in a dict, which is how `details` returns.""" initial_datetime = datetime.strptime( self.initial_date, '%Y-%m-%d').date() until_datetime = datetime.strptime(self.until_date, '%Y-%m-%d').date() while initial_datetime != (until_datetime + timedelta(days=1)): current_date = initial_datetime.strftime('%Y-%m-%d') - log.debug('Current date: %s', current_date) - print current_date + log.debug('Current date: {}'.format(current_date)) + print(current_date) - glob_string = '%s/data/raw/%s/form-html/*.html' % ( + glob_string = '{0}/data/raw/{1}/form-html/*.html'.format( PROJECT_DIR, current_date) # Allows for variable calls to a class. # Ex module.Class().method -> parse.parser_name(f).list_output for filepath in sorted(glob.glob(glob_string)): - # log.debug('filepath: %s', filepath) + # log.debug('filepath: {}'.format(filepath)) dict_output = getattr(parse, parser_name)(filepath).form_dict() self.commit_to_database(table, dict_output) @@ -81,26 +73,26 @@ def dict_parse(self, parser_name, table): initial_datetime += timedelta(days=1) def commit_to_database(self, table, output): - '''Commits to database using nested transactions and exceptions.''' - + """Commit to database using nested transactions and exceptions.""" try: + # TODO: Is this the correct method for this? with SESSION.begin_nested(): i = insert(getattr(db, table)) vals = i.values(output) - SESSION.execute(vals) + SESSION.execute(vals) # TODO: What is this? SESSION.flush() - except Exception, error: + except Exception as error: log.debug(error, exc_info=True) SESSION.rollback() - SESSION.commit() + SESSION.commit() # TODO: Should this be here? def list_parse(self, parser_name, table): - ''' - Parses data structured as a list of dicts, - which is how `locations`, `vendees` and `vendors` returns. - ''' + """ + Parse data structured as a list of dicts. + This is how `locations`, `vendees` and `vendors` returns. + """ initial_datetime = datetime.strptime( self.initial_date, '%Y-%m-%d').date() until_datetime = datetime.strptime(self.until_date, '%Y-%m-%d').date() @@ -108,10 +100,10 @@ def list_parse(self, parser_name, table): while initial_datetime != (until_datetime + timedelta(days=1)): current_date = initial_datetime.strftime('%Y-%m-%d') - log.debug('Current date: %s', current_date) - print current_date + log.debug('Current date: {}'.format(current_date)) + print(current_date) - glob_string = '%s/data/raw/%s/form-html/*.html' % ( + glob_string = '{0}/data/raw/{1}/form-html/*.html'.format( PROJECT_DIR, current_date) for filepath in sorted(glob.glob(glob_string)): @@ -122,6 +114,3 @@ def list_parse(self, parser_name, table): self.commit_to_database(table, output) initial_datetime += timedelta(days=1) - -if __name__ == '__main__': - pass diff --git a/scripts/check_temp_status.py b/scripts/check_temp_status.py new file mode 100644 index 0000000..e562d8f --- /dev/null +++ b/scripts/check_temp_status.py @@ -0,0 +1,321 @@ +# -*- coding: utf-8 -*- + +""" +Needs more work. + +The Land Records Division's remote subscription service has a permanent date +range and temporary date range for its records. Sales are indexed, marked as +temporary and then reviewed a second time to become permanent. This means +that there is usually a lag of a day or two between its initial, temporary +record and the permanent version. Lately, the records have not been fully +entered on the first pass, so the records really can't be trusted until they +are labeled as permanent. + +When sales first come in, they are assumed to be temporary (`permanent_flag` +is False) in `cleaned` table. `check_temp_status.py` checks when the sales +were scraped compared to the Land Records permanent date range at the time of +the scrape. If the date of a scrape falls within the permanent date range, +those records are updated to `permanent_flag` is True in `cleaned`. Otherwise, +it stays temporary. + +This is checked each day. Once a date eventually falls within the permanent +date range, the day's records are re-scraped, built, geocoded, cleaned and +published, with `permanent_flag` set as True. +""" + +import os +import re +import glob + +from datetime import datetime, timedelta +from sqlalchemy import func + +from www import log, PROJECT_DIR, SESSION +from www.db import Cleaned, Detail +from scripts.delete_dates import DeleteDates +from scripts.scrape import Scrape + + +class CheckTemp(object): + """Check on the temporary/permanent status of sales.""" + + def __init__(self): + """Establish connections to the database.""" + pass + + def earliest_date_no_flag(self): + """Find the earliest date_recorded without permanent_flag set.""" + query = SESSION.query( + func.min(Detail.document_recorded).label('early_date') + ).filter( + Detail.permanent_flag.is_(None) # To satisfy PEP8 + ).all() + + for row in query: + earliest_none_date = row.early_date + + earliest_none_datetime = datetime.combine( + earliest_none_date, datetime.min.time()) + + SESSION.close() + + return earliest_none_datetime + + def latest_date_no_flag(self): + """Finds the latest date_recorded without permanent_flag set.""" + query = SESSION.query( + func.max(Detail.document_recorded).label('late_date') + ).filter( + Detail.permanent_flag.is_(None) + ).all() + + for row in query: + latest_none_date = row.late_date + + latest_none_datetime = datetime.combine( + latest_none_date, datetime.min.time()) + + SESSION.close() + + return latest_none_datetime + + @staticmethod + def find_early_perm_date_when_scraped(current_iteration_date): + """Finds the earliest date_recorded with permanent_flag.""" + pattern = ( + r'%s/data/raw/' % PROJECT_DIR + + r'%s/' % current_iteration_date + + r'permanent-date-range-when-scraped_(\d+)-(\d+).html') + + file_path = glob.glob( + '%s/data/raw/%s/permanent-date-range-when-scraped_*.html' % ( + PROJECT_DIR, current_iteration_date))[0] + + early_permanent_date = re.match(pattern, file_path).group(1) + + early_permanent_datetime = datetime.strptime( + early_permanent_date, + '%m%d%Y') # ).strftime('%Y-%m-%d') + + return early_permanent_datetime + + @staticmethod + def find_late_perm_date_when_scraped(current_iteration_date): + """TODO.""" + pattern = ( + r'%s/data/raw/' % PROJECT_DIR + + r'%s/' % current_iteration_date + + r'permanent-date-range-when-scraped_(\d+)-(\d+).html') + + file_path = glob.glob( + '%s/data/raw/%s/permanent-date-range-when-scraped_*.html' % ( + PROJECT_DIR, current_iteration_date))[0] + + late_permanent_date = re.match(pattern, file_path).group(2) + + late_permanent_datetime = datetime.strptime( + late_permanent_date, + '%m%d%Y') # ).strftime('%Y-%m-%d') + + return late_permanent_datetime + + def update_this_dates_permanent_flag(self, + current_datetime, + early_permanent_datetime, + late_permanent_datetime): + """TODO.""" + cond = (early_permanent_datetime <= current_datetime and + current_datetime <= late_permanent_datetime) + + if cond: + SESSION.query( + Detail + ).filter( + Detail.document_recorded == '%s' % current_datetime + ).update({"permanent_flag": True}) + + SESSION.query( + Cleaned + ).filter( + Cleaned.document_recorded == '%s' % current_datetime + ).update({"permanent_flag": True}) + SESSION.commit() + else: + SESSION.query( + Detail + ).filter( + Detail.document_recorded == '%s' % current_datetime + ).update({"permanent_flag": False}) + + SESSION.query( + Cleaned + ).filter( + Cleaned.document_recorded == '%s' % current_datetime + ).update({"permanent_flag": False}) + SESSION.commit() + + def check_permanent_status_of_new_sales(self): + """ + Examine first-time sales and assign True or False for permanent_flag. + """ + log.debug('Check permanent status of new sales') + + # TODO: Is this function called for sales that have already been given + # a False flag? Need to change that if so, because this only looks + # for sales with no flag. Could change to check for None or False. + + # Get dates to inspect + earliest_datetime = self.earliest_date_no_flag() + latest_datetime = self.latest_date_no_flag() + + # For all folders (dates) + while earliest_datetime != (latest_datetime + timedelta(days=1)): + current_iteration_date = earliest_datetime.strftime('%Y-%m-%d') + + early_permanent_datetime = self.find_early_perm_date_when_scraped( + current_iteration_date) + + late_permanent_datetime = self.find_late_perm_date_when_scraped( + current_iteration_date) + + # For this date that is currently considered temporary (whether by + # default or because it was previously confirmed to be temporary), + # check on the permanent date range at the time of the scrape. + + self.update_this_dates_permanent_flag(earliest_datetime, + early_permanent_datetime, + late_permanent_datetime) + + earliest_datetime += timedelta(days=1) + + # Check to see if temporary sales can now be scraped as permanent + def earliest_date_temp_flag(self): + """Find earliest date with permanent_flag = False.""" + query = SESSION.query( + func.min(Detail.document_recorded).label('early_date') + ).filter( + Detail.permanent_flag.is_(False) # To satisfy PEP8 + ).all() + + for row in query: + earliest_temp_date = row.early_date + + if earliest_temp_date is not None: + earliest_temp_datetime = datetime.combine( + earliest_temp_date, datetime.min.time()) + + log.debug(earliest_temp_datetime) + + SESSION.close() + + return earliest_temp_datetime + else: + SESSION.close() + return None + + @staticmethod + def latest_permanent_datetime(): + """TODO.""" + pattern = r'%s/data/' % (PROJECT_DIR) + \ + r'most-recent-permanent-date-range_(\d+)-(\d+).html' + + file_path = glob.glob( + '%s/data/most-recent-permanent-date-range_*.html' % ( + PROJECT_DIR))[0] + + global_permanent_range_last_date = re.match( + pattern, file_path).group(2) + + global_permanent_range_last_datetime = datetime.strptime( + global_permanent_range_last_date, + '%m%d%Y') # ).strftime('%Y-%m-%d') + + return global_permanent_range_last_datetime + + @staticmethod + def find_newly_permanent_date_range(start_temp_datetime, + end_permanent_datetime): + """TODO.""" + # Find difference between permanent end date and temp start date. + # If positive or zero, then those two dates form the scrape range. + + date_diff = end_permanent_datetime - start_temp_datetime + + if date_diff.days >= 0: + # There is at least one day currently labeled as "temporary" that + # now falls within "permanent" date range. + dates_to_redo = [ + start_temp_datetime, end_permanent_datetime] + return dates_to_redo + else: + # The "temporary" sales are all still in the temporary date range + return None + + def check_permanent_status_of_temp_sales(self): + """ + Compare latest permanent date range to the range of sales in our + database that are labeled "temporary." If any of those temporary sales + now fall within the permanent range, re-scrape and re-initialize. + """ + log.debug('Check permanent status of temporary sales') + + # Don't need to know temporary end date or permanent start date. + # Only need to know temporary start date and permanent end date + # to determine the dates that were temporary but are now permanent. + # See find_date_range_to_rescrape_and_initialize() for logic. + + earliest_temp_datetime = self.earliest_date_temp_flag() + + if earliest_temp_datetime is None: # No temporary sales + return + + global_permanent_range_last_datetime = self.latest_permanent_datetime() + + dates_to_redo = self.find_newly_permanent_date_range( + earliest_temp_datetime, + global_permanent_range_last_datetime) + + if dates_to_redo is not None: + self.scrape_days(dates_to_redo[0], dates_to_redo[1]) + + @staticmethod + def scrape_days(early_date, late_date): + """docstring""" + + early_datetime = datetime.strptime(early_date, '%Y-%m-%d') + log.debug(early_datetime) + late_datetime = datetime.strptime(late_date, '%Y-%m-%d') + log.debug(early_datetime) + + # Scrape those days over again + log.info('scrape') + try: + Scrape( + initial_date=early_datetime, + until_date=late_datetime + ).main() + except Exception as error: + log.error(error, exc_info=True) + + def delete_existing_records(self, early_date, late_date): + """Delete existing records for this date. Order matters""" + + DeleteDates( + initial_date=early_date, + until_date=late_date + ).main() + + @staticmethod + def rebuild_days(early_date, late_date): + """Scrapes and initializes dates.""" + + print(early_date, late_date) + + # Build those newly scraped records. + # This will set perm_flag = True in + # checkPermanentStatusOfNewSales(). + log.info('doitall') + # initialize.do_it_all(early_date, late_date) # todo: uncomment + +if __name__ == '__main__': + pass diff --git a/realestate/lib/clean.py b/scripts/clean.py similarity index 83% rename from realestate/lib/clean.py rename to scripts/clean.py index d8ec029..8f5c3b5 100644 --- a/realestate/lib/clean.py +++ b/scripts/clean.py @@ -1,7 +1,8 @@ # -*- coding: utf-8 -*- """ -Has two classes: `Join` and `Clean`. +This has two classes: `Join` and `Clean`. + `Join` joins each of the four sale tables (details, vendors, vendees and locations) on their document ID and commits each sale to the `cleaned` table. @@ -9,40 +10,38 @@ utilizes `libraries.py`, a collection of items to check for. """ -import os import re -from sqlalchemy import insert, func, cast, Text -from realestate.db import ( +from sqlalchemy import insert, func, cast, Text, create_engine + +from scripts.libraries import Library +from www import log, USER, SESSION, ENGINE_STRING +from www.db import ( Cleaned, Detail, Location, Vendee, - Vendor -) -from realestate.lib.libraries import Library -from realestate import log, USER, DATABASE_NAME + Vendor) class Join(object): - - '''JOIN the four individual tables.''' + """JOIN the four individual tables.""" def __init__(self, initial_date=None, until_date=None): - '''Initialize self variables and establish connection to database.''' - + """Initialize self variables and establish connection to database.""" self.initial_date = initial_date self.until_date = until_date - def get_details(self): - '''Returns SQL query of details table for given date range.''' + self.engine = create_engine(ENGINE_STRING) + def get_details(self): + """Return SQL query of details table for given date range.""" subquery = SESSION.query( Detail ).filter( - Detail.document_recorded >= '%s' % self.initial_date + Detail.document_recorded >= '{}'.format(self.initial_date) ).filter( - Detail.document_recorded <= '%s' % self.until_date + Detail.document_recorded <= '{}'.format(self.until_date) ).subquery() log.debug(subquery) @@ -52,14 +51,13 @@ def get_details(self): return subquery def get_vendees(self): - '''Returns SQL query of vendees table for given date range.''' - + """Return SQL query of vendees table for given date range.""" log.debug('get_vendees') subquery = SESSION.query( Vendee.document_id, func.string_agg( - cast(Vendee.vendee_firstname, Text) + ' ' + + cast(Vendee.vendee_firstname, Text) + " " + cast(Vendee.vendee_lastname, Text), ', ' ).label('buyers') @@ -74,14 +72,13 @@ def get_vendees(self): return subquery def get_vendors(self): - '''Returns SQL query of vendors table for given date range.''' - + """Return SQL query of vendors table for given date range.""" log.debug('get_vendors') subquery = SESSION.query( Vendor.document_id, func.string_agg( - cast(Vendor.vendor_firstname, Text) + ' ' + + cast(Vendor.vendor_firstname, Text) + " " + cast(Vendor.vendor_lastname, Text), ', ' ).label('sellers') @@ -96,8 +93,7 @@ def get_vendors(self): return subquery def get_locations(self): - '''Returns SQL query of locations table for given date range.''' - + """Return SQL query of locations table for given date range.""" log.debug('get_locations') subquery = SESSION.query( @@ -136,8 +132,7 @@ def get_locations(self): return subquery def join_subqueries(self): - '''Runs a JOIN on subqueries.''' - + """Run a JOIN on subqueries.""" log.debug('join_subqueries') subq_vendees = self.get_vendees() @@ -159,7 +154,7 @@ def join_subqueries(self): subq_location.c.location_publish, subq_location.c.address, subq_location.c.location_info - # todo: Once SQLAlchemy supports WITHIN GROUP, uncomment these. + # TODO: Once SQLAlchemy supports WITHIN GROUP, uncomment these. # subq_location.c.zip_code, # subq_location.c.latitude, # subq_location.c.longitude, @@ -171,9 +166,9 @@ def join_subqueries(self): ).join( subq_location ).filter( - Detail.document_recorded >= '%s' % self.initial_date + Detail.document_recorded >= '{}'.format(self.initial_date) ).filter( - Detail.document_recorded <= '%s' % self.until_date + Detail.document_recorded <= '{}'.format(self.until_date) ).all() log.debug('len(query): %d', len(query)) @@ -183,33 +178,38 @@ def join_subqueries(self): return query def get_rows_from_query(self): - '''Convert query result to row of dicts.''' - + """Convert query result to row of dicts.""" log.debug('get_rows_from_query') query = self.join_subqueries() rows = [] + for row in query: - dict_val = row.__dict__ - del dict_val['_labels'] # todo: necessary? - # del dict_val['document_id'] # leave for now bc of ___ todo + # dict_val = row.__dict__ # Old + dict_val = dict(zip(row.keys(), row)) # New. TODO: Check it works. + rows.append(dict_val) - log.debug('len(rows): %d', len(rows)) + log.debug('len(rows): {}'.format(len(rows))) return rows def add_location_fields_temp_hack(self, incoming_rows): - ''' - SQLAlchemy doesn't yet support WITHIN GROUP, which is necessary for + """ + The SQLAlchemy doesn't yet support WITHIN GROUP. + + This is necessary for using mode() aggregate function in PostgreSQL 9.4 (see get_locations() for normal use). So instead, this hack will temporary do that job. - ''' - - if USER == 'thomasthoren': - sql = """SELECT - document_id, + """ + # TODO: SQLAlchemy 1.1 + # func.mode(zip_code).within_group(zip_code), + + # TODO: Remove hack. + if USER == 'tom': + sql = """ + SELECT document_id, -- mode(zip_code) AS zip_code, -- mode(latitude) AS latitude, -- mode(longitude) AS longitude, @@ -234,6 +234,19 @@ def add_location_fields_temp_hack(self, incoming_rows): FROM locations GROUP BY document_id""" + # sql = """ + # SELECT document_id, + # mode(zip_code) AS zip_code, + # mode(latitude) AS latitude, + # mode(longitude) AS longitude, + # mode(neighborhood) AS neighborhood + # -- mode() WITHIN GROUP (ORDER BY zip_code) AS zip_code, + # -- mode() WITHIN GROUP (ORDER BY latitude) AS latitude, + # -- mode() WITHIN GROUP (ORDER BY longitude) AS longitude, + # -- mode() WITHIN GROUP (ORDERBY neighborhood) AS neighborhood + # FROM locations + # GROUP BY document_id""" + result = self.engine.execute(sql) rows = [] @@ -265,21 +278,20 @@ def add_location_fields_temp_hack(self, incoming_rows): class Clean(object): - - '''Clean the joined tables and commit to cleaned.''' + """Clean the joined tables and commit to cleaned.""" def __init__(self, initial_date=None, until_date=None): - '''Initialize self variables and establish connection to database.''' + """Initialize self variables and establish connection to database.""" + self.engine = create_engine(ENGINE_STRING) self.initial_date = initial_date self.until_date = until_date - log.debug('self.initial_date: %s', self.initial_date) - log.debug('self.until_date: %s', self.until_date) + log.debug('self.initial_date: {}'.format(self.initial_date)) + log.debug('self.until_date: {}'.format(self.until_date)) def update_cleaned_geom(self): - '''Update the PostGIS geom field in the cleaned table.''' - + """Update the PostGIS geom field in the cleaned table.""" log.debug('Update Cleaned geometry') sql = """UPDATE cleaned @@ -289,14 +301,12 @@ def update_cleaned_geom(self): @staticmethod def prep_rows(rows): - '''Returns all rows in Titlecase.''' - + """Return all rows in Titlecase.""" # This loop returns text that is not all-caps, but is still flawed: # to standardize upper and lowercases for row in rows: # Capitalizes the first letter in each word. # Results in words like Llc, Xiv, etc - row['sellers'] = row['sellers'].title() row['buyers'] = row['buyers'].title() row['address'] = row['address'].title() @@ -307,8 +317,7 @@ def prep_rows(rows): @staticmethod def check_for_acronyms(rows): - '''Corrects acronyms.''' - + """Correct acronyms.""" # This loop scans for the above problem words and replaces them with # their substitutes: for row in rows: @@ -336,8 +345,7 @@ def check_for_acronyms(rows): @staticmethod def check_for_mcnames(rows): - '''Corrects Mc___ names.''' - + """Correct Mc___ names.""" for row in rows: # Check for occurences of problematic "Mc" names. Corrections # assume that the letter after should be capitalized: @@ -354,8 +362,7 @@ def check_for_mcnames(rows): @staticmethod def check_for_abbreviations(rows): - '''Corrects abbreviations.''' - + """Correct abbreviations.""" for row in rows: # Check for problematic abbreviations: for abbreviation in Library().abbreviations: @@ -374,8 +381,7 @@ def check_for_abbreviations(rows): @staticmethod def check_for_adress_abbreviations(rows): - '''Corrects address abbreviations.''' - + """Correct address abbreviations.""" for row in rows: # Fix address abbreviations (for AP style purposes) for address_abbreviation in Library().streets: @@ -393,8 +399,7 @@ def check_for_adress_abbreviations(rows): @staticmethod def check_for_middle_initials(rows): - '''Corrects middle initials.''' - + """Correct middle initials.""" for row in rows: for middle_initial in Library().middle_initials: middle_initial0 = middle_initial[0] @@ -416,8 +421,7 @@ def check_for_middle_initials(rows): @staticmethod def check_for_neighborhood_names(rows): - '''Corrects neighborhood names.''' - + """Correct neighborhood names.""" for row in rows: for neighborhood_name in Library().neighborhood_names: name0 = neighborhood_name[0] @@ -429,8 +433,7 @@ def check_for_neighborhood_names(rows): @staticmethod def regex_subs(rows): - '''More than simple find-and-replace tasks.''' - + """More than simple find-and-replace tasks.""" for row in rows: # Must do regex for "St" and others. Imagine "123 Star St". # Scanning for " St" in the above loop would catch the start of @@ -446,8 +449,7 @@ def regex_subs(rows): @staticmethod def convert_amounts(rows): - '''Convert string, with or without $ and commas, to rounded int.''' - + """Convert string, with or without $ and commas, to rounded int.""" for row in rows: row['amount'] = str(row['amount']) row['amount'] = re.sub(r'\$', r'', row['amount']) # remove the $ @@ -459,8 +461,7 @@ def convert_amounts(rows): return rows def clean_rows(self, rows): - '''Run rows through all cleaning methods.''' - + """Run rows through all cleaning methods.""" rows = self.check_for_acronyms(rows) rows = self.check_for_mcnames(rows) rows = self.check_for_abbreviations(rows) @@ -487,8 +488,7 @@ def clean_rows(self, rows): @staticmethod def clean_punctuation(rows): - '''Fix punctuation (leading/trailing spaces or commas).''' - + """Fix punctuation (leading/trailing spaces or commas).""" for row in rows: row['sellers'] = row['sellers'].strip( ' ,' @@ -514,8 +514,7 @@ def clean_punctuation(rows): @staticmethod def other_stuff_addresses(rows): - '''Runs checks for addresses.''' - + """Run checks for addresses.""" # log.debug(rows) for row in rows: @@ -538,7 +537,7 @@ def other_stuff_addresses(rows): individual_address_text + ', ' + j.strip()) - except Exception, error: + except Exception as error: log.exception(error, exc_info=True) continue @@ -559,8 +558,7 @@ def other_stuff_addresses(rows): @staticmethod def other_stuff_location_info(rows): - '''Runs checks for location_info.''' - + """Run checks for location_info.""" for row in rows: # To remove district ordinal row['location_info'] = row['location_info'].replace('1st', '1') @@ -591,7 +589,7 @@ def other_stuff_location_info(rows): individiual_location_text + ', ' + j.strip()) - except Exception, error: + except Exception as error: log.exception(error, exc_info=True) continue @@ -610,8 +608,7 @@ def other_stuff_location_info(rows): return rows def commit_rows(self, rows): - '''Commits JOIN-ed rows to the cleaned table.''' - + """Commit JOIN-ed rows to the cleaned table.""" log.debug('Committing %d rows', len(rows)) for count, row in enumerate(rows): @@ -622,7 +619,7 @@ def commit_rows(self, rows): i = i.values(row) SESSION.execute(i) SESSION.flush() - except Exception, error: + except Exception as error: log.debug('count: %s', count) log.exception(error, exc_info=True) SESSION.rollback() @@ -634,10 +631,9 @@ def commit_rows(self, rows): # session.close() def main(self): - '''Run Join() and Clean() scripts.''' - + """Run Join() and Clean() scripts.""" log.info('Clean') - print 'Cleaning...' + print('Cleaning...') log.debug('get_rows_from_query') rows = Join( @@ -661,3 +657,14 @@ def main(self): if __name__ == '__main__': Clean().main() + + # TODO + # rows = Join( + # initial_date="2016-07-13", + # until_date="2016-07-13" + # ).get_rows_from_query() + + # rows2 = Join( + # initial_date="2016-07-13", + # until_date="2016-07-13" + # ).add_location_fields_temp_hack(rows) diff --git a/scripts/delete_dates.py b/scripts/delete_dates.py new file mode 100644 index 0000000..f30b703 --- /dev/null +++ b/scripts/delete_dates.py @@ -0,0 +1,175 @@ +# -*- coding: utf-8 -*- + +""" +Command line tool for deleting all records for a given date or date range. + +Meant for quicker testing. + +Usage: + delete_dates.py + delete_dates.py + +Options: + -h, --help Show help screen. + --version Show version number. + +Dates are in the format YYYY-MM-DD. Ex. 2016-12-31 +""" + +import os +import psycopg2 + +from datetime import datetime +from docopt import docopt + +from www.db import Cleaned, Detail +from www import log, SESSION, DATABASE_NAME + + +class BadDateRangeError(Exception): + """Error for when date range is backward.""" + + pass + + +class DeleteDates(object): + """Delete certain dates from database.""" + + def __init__(self, initial_date=None, until_date=None): + """Initialize self variables and establish connection to database.""" + engine_string = ( + 'host=localhost dbname={0} user={1} password={2}').format( + DATABASE_NAME, + os.environ.get('REAL_ESTATE_DATABASE_USERNAME'), + os.environ.get('REAL_ESTATE_DATABASE_PASSWORD')) + self.conn = psycopg2.connect(engine_string) + self.cursor = self.conn.cursor() + + self.initial_date = initial_date + self.until_date = until_date + + log.debug('self.initial_date: {}'.format(self.initial_date)) + log.debug('self.until_date: {}'.format(self.until_date)) + + def main(self): + """Run Join() and Clean() scripts.""" + self.delete_details() + self.delete_cleaned() + self.vacuum() + + def vacuum(self): + """Update database.""" + old_isolation_level = self.conn.isolation_level + self.conn.set_isolation_level(0) + sql = 'VACUUM;' + self.cursor.execute(sql) + self.conn.commit() + self.conn.set_isolation_level(old_isolation_level) + + def delete_details(self): + """TODO.""" + SESSION.query( + Detail + ).filter( + Detail.document_recorded >= '{}'.format(self.initial_date) + ).filter( + Detail.document_recorded <= '{}'.format(self.until_date) + ).delete() + + SESSION.commit() + + def delete_cleaned(self): + """TODO.""" + SESSION.query( + Cleaned + ).filter( + Cleaned.document_recorded >= '{}'.format(self.initial_date) + ).filter( + Cleaned.document_recorded <= '{}'.format(self.until_date) + ).delete() + + SESSION.commit() + + +def cli_has_errors(arguments): + """Check for any CLI parsing errors.""" + all_arguments = ( + arguments[''] is not None and + arguments[''] is not None and + arguments[''] is not None) + + if all_arguments: + # print("Must use single date or date range, but not both.") + return True + + no_arguments = ( + arguments[''] is not None and + arguments[''] is not None and + arguments[''] is not None) + + if no_arguments: + # print("You must supply at least one date.") + return True + + single_and_other_arguments = ( + ( + arguments[''] is not None and + arguments[''] is not None + ) or + ( + arguments[''] is not None and + arguments[''] is not None + )) + + if single_and_other_arguments: + # print("Cannot use a single date and a date range bound.") + return True + + one_date_bound_only = ( + ( + arguments[''] is not None and + arguments[''] is None + ) or + ( + arguments[''] is None and + arguments[''] is not None + )) + + if one_date_bound_only: + # print("Must pick both ends of a date range bound.") + return True + + # All good + return False + + +def cli(arguments): + """Parse command-line arguments.""" + # Catch any missed errors. + if cli_has_errors(arguments): + return + + if arguments['']: # Single date + early_date = arguments[''] + late_date = arguments[''] + + log.info('Initializing single date: {}.'.format(early_date)) + elif arguments[''] and arguments['']: # Date range + early_date = arguments[''] + late_date = arguments[''] + + log.info('Initializing date range: {0} to {1}.'.format( + early_date, late_date)) + + # Check for errors + early_datetime = datetime.strptime(early_date, "%Y-%m-%d") + late_datetime = datetime.strptime(late_date, "%Y-%m-%d") + + if early_datetime > late_datetime: + raise BadDateRangeError("The date range does not make sense.") + + DeleteDates(initial_date=early_date, until_date=late_date).main() + +if __name__ == '__main__': + arguments = docopt(__doc__, version="0.0.1") + cli(arguments) diff --git a/scripts/delete_db.py b/scripts/delete_db.py index 47e23e6..3eef05f 100644 --- a/scripts/delete_db.py +++ b/scripts/delete_db.py @@ -7,6 +7,7 @@ ''' import os + from subprocess import call from sqlalchemy.engine import reflection from sqlalchemy import create_engine @@ -15,10 +16,9 @@ Table, DropTable, ForeignKeyConstraint, - DropConstraint -) + DropConstraint) -from realestate import log, DATABASE_NAME, BACKUP_DIR, TODAY_DATE +from www import log, ENGINE_STRING, BACKUP_DIR, TODAY_DATE class Delete(object): @@ -28,13 +28,7 @@ class Delete(object): def __init__(self): '''Establish connection to the database.''' - engine = create_engine( - 'postgresql://%s:%s@localhost/%s' % ( - os.environ.get('REAL_ESTATE_DATABASE_USERNAME'), - os.environ.get('REAL_ESTATE_DATABASE_PASSWORD'), - DATABASE_NAME - ) - ) + engine = create_engine(ENGINE_STRING) self.conn = engine.connect() self.trans = self.conn.begin() self.inspector = reflection.Inspector.from_engine(engine) @@ -71,7 +65,7 @@ def dump_dashboard_table(): '{0}'.format(BACKUP_DIR) + '/dashboard_table_{0}.sql'.format(TODAY_DATE) ]) - except Exception, error: + except Exception as error: log.debug(error, exc_info=True) @staticmethod @@ -87,7 +81,7 @@ def drop_db(): 'dropdb', '%s' % DATABASE_NAME ]) - except Exception, error: + except Exception as error: log.debug(error, exc_info=True) @staticmethod @@ -104,7 +98,7 @@ def vacuum_database(): '-c', 'VACUUM;' ]) - except Exception, error: + except Exception as error: log.debug(error, exc_info=True) def drop_tables(self): diff --git a/scripts/delete_db.sh b/scripts/delete_db.sh new file mode 100755 index 0000000..e02305d --- /dev/null +++ b/scripts/delete_db.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +# Setup database +echo "Force users to quit realestate database session..." +psql realestate -c " +SELECT pg_terminate_backend(pg_stat_activity.pid) +FROM pg_stat_activity +WHERE datname = current_database() +AND pid <> pg_backend_pid();" + +echo "Backing up database..." +pg_dump realestate > $BACKUP_DIR/db-$(date +%Y-%m-%d-%H-%M-%S).sql + +echo "Drop citysalaries database if it exists..." +dropdb --if-exists realestate + +# os.environ.get('REAL_ESTATE_DATABASE_USERNAME'), +# os.environ.get('REAL_ESTATE_DATABASE_PASSWORD'), diff --git a/realestate/lib/email_template.py b/scripts/email_template.py similarity index 52% rename from realestate/lib/email_template.py rename to scripts/email_template.py index 2df4ec0..9f90cae 100644 --- a/realestate/lib/email_template.py +++ b/scripts/email_template.py @@ -1,40 +1,41 @@ # -*- coding: utf-8 -*- -''' -The template for the summary email. Draws on `stat_analysis.py` for common, -interesting queries. -''' +""" +The template for the summary email. -from realestate.lib.stat_analysis import StatAnalysis -from realestate.lib.utils import Utils +Draws on `stat_analysis.py` for common, interesting queries. +""" + +from scripts.stat_analysis import StatAnalysis +from www.utils import ( + ymd_to_full_date, + ymd_to_mdy_slashes) class EmailTemplate(object): - '''Email template class, including subject and body generators.''' + """Email template class, including subject and body generators.""" def __init__(self, initial_date=None, until_date=None): - '''Initialize self variables with date range.''' - + """Initialize self variables with date range.""" self.initial_date = initial_date self.until_date = until_date def generate_subject(self): - '''Generates subject for email.''' - + """Generate subject for email.""" subject = "Real Estate summary for" if self.initial_date == self.until_date: - subject += " %s" % Utils().ymd_to_full_date(self.initial_date) + subject += " {}".format(ymd_to_full_date(self.initial_date)) else: - subject += " %s to %s" % ( - Utils().ymd_to_full_date(self.initial_date, no_day=True), - Utils().ymd_to_full_date(self.until_date, no_day=True)) + subject += " {0} to {1}".format( + ymd_to_full_date(self.initial_date, no_day=True), + ymd_to_full_date(self.until_date, no_day=True) + ) return subject def generate_body(self): - '''Combine the email body parts.''' - + """Combine the email body parts.""" email_summary = self.generate_body_summary() email_list = self.generate_body_list() @@ -43,58 +44,37 @@ def generate_body(self): return email_string def generate_body_summary(self): - '''Generates body for email, including statistics.''' - + """Generate body for email, including statistics.""" stat = StatAnalysis( initial_date=self.initial_date, until_date=self.until_date ) email_summary = ( - '

http://vault.thelensnola.org/realestate/search?d1={0}&d2={1}' + - '

' + - '\n' + - '\n' + - '

{2} sales recorded on {3} to {4}.' + - '

' + - '\n' + - '\n' + - '

{5} records not published because of questionable data.' + - '

' + - '\n' + - '\n' + + '

//vault.thelensnola.org/realestate/search?d1={0}&d2={1}' + + '

\n\n' + + '

{2} sales recorded on {3} to {4}.

\n\n' + + '

{5} records not published because of suspect data.

\n\n' + '

{6} records not published because location ' + - 'could not be found.' + - '

' + - '\n' + - '\n' + - '

http://vault.thelensnola.org/realestate/dashboard' + - '

' + - '\n' + - '\n' + - '

High: ${7}

' + - '\n' + - '\n' + - '

Low: ${8}

' + - '\n' + - '\n' + 'could not be found.

\n\n' + + '

High: ${7}

\n\n' + + '

Low: ${8}

\n\n' ).format( - Utils().ymd_to_mdy_slashes(self.initial_date), - Utils().ymd_to_mdy_slashes(self.until_date), - format(stat.count(), ','), - Utils().ymd_to_full_date(self.initial_date), - Utils().ymd_to_full_date(self.until_date), - format(stat.detail_not_published(), ','), - format(stat.location_not_published(), ','), - stat.highest_amount(), - stat.lowest_amount() + ymd_to_mdy_slashes(self.initial_date), # 0 + ymd_to_mdy_slashes(self.until_date), # 1 + format(stat.count(), ','), # 2 + ymd_to_full_date(self.initial_date), # 3 + ymd_to_full_date(self.until_date), # 4 + format(stat.detail_not_published(), ','), # 5 + format(stat.location_not_published(), ','), # 6 + stat.highest_amount(), # 7 + stat.lowest_amount() # 8 ) return email_summary def generate_body_list(self): - '''Generate list of all sales in given date range.''' - + """Generate list of all sales in given date range.""" stat = StatAnalysis(self.initial_date, self.until_date) rows = stat.all_records() @@ -133,6 +113,3 @@ def generate_body_list(self): message = '' return email_list - -if __name__ == '__main__': - pass diff --git a/realestate/lib/geocode.py b/scripts/geocode.py similarity index 66% rename from realestate/lib/geocode.py rename to scripts/geocode.py index cc84880..ddb025c 100644 --- a/realestate/lib/geocode.py +++ b/scripts/geocode.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- """ -Uses the Google Geocoding API (we used to use the PostGIS Geocoder) to -geocode addresses, resulting in ZIP codes, latitude, longitude and an -accuracy rating. A rating of "ROOFTOP" or "RANGE_INTERPOLATED" is good enough -for publication. +Use the Google Geocoding API to geocode addresses. + +Returns ZIP codes, latitude, longitude and an accuracy rating. +A rating of "ROOFTOP" or "RANGE_INTERPOLATED" is good enough for publication. This also includes a method that uses PostGIS to find the neighborhood in which each sale occurred, working with a neighborhood shapefile available @@ -13,23 +13,18 @@ import os import googlemaps + from sqlalchemy import func, cast, Float, update -from realestate.db import ( - Detail, - Location, - Neighborhood -) -from realestate import log, SESSION +from www.db import Detail, Location, Neighborhood +from www import log, SESSION class Geocode(object): - - '''Geocode class that needs no input.''' + """Geocode class that needs no input.""" def __init__(self, initial_date=None, until_date=None): - '''Generates connections to PostgreSQL and SQLAlchemy.''' - + """Generate connections to PostgreSQL and SQLAlchemy.""" self.initial_date = initial_date self.until_date = until_date @@ -37,18 +32,15 @@ def __init__(self, initial_date=None, until_date=None): key=os.environ.get('GOOGLE_GEOCODING_API_KEY')) def update_locations_with_neighborhoods(self): - '''Finds neighborhoods and handles if none found.''' - + """Find neighborhoods and handles if none found.""" self.neighborhood_found() self.no_neighborhood_found() def neighborhood_found(self): - '''Use PostGIS to find which neighborhood a long/lat pair is in.''' - + """Use PostGIS to find which neighborhood a long/lat pair is in.""" log.debug('neighborhood_found') SESSION.query( - # Neighborhood, Location ).filter( func.ST_Contains( @@ -69,8 +61,7 @@ def neighborhood_found(self): SESSION.commit() def no_neighborhood_found(self): - '''If no neighborhood is found, update with "None" in nbhd field.''' - + """If no neighborhood is found, update with "None" in nbhd field.""" log.debug('no_neighborhood_found') SESSION.query( @@ -85,13 +76,11 @@ def no_neighborhood_found(self): SESSION.commit() def get_rows_with_null_rating(self): - ''' - Returns query result for locations with rating IS NULL, between dates - defined in self.initial_date and self.until_date. + """ + Return query result for locations with rating IS NULL. :returns: SQLAlchemy query result. - ''' - + """ query = SESSION.query( Location.rating, Location.document_id, @@ -102,27 +91,26 @@ def get_rows_with_null_rating(self): ).filter( Location.rating.is_(None) ).filter( - Detail.document_recorded >= '%s' % self.initial_date + Detail.document_recorded >= '{}'.format(self.initial_date) ).filter( - Detail.document_recorded <= '%s' % self.until_date + Detail.document_recorded <= '{}'.format(self.until_date) ).all() - log.debug('Rows with rating is NULL: %d', len(query)) + log.debug('Rows with rating is NULL: {}'.format(len(query))) SESSION.close() return query def process_google_results(self, result): - ''' - Returns a dict of the returned geocoding results. + """ + Return a dict of the returned geocoding results. :param result: The returned results from Google. :type result: JSON :returns: A dict with rating, latitude, longitude and ZIP code. :rtype: dict - ''' - + """ log.debug('process_google_results') dict_output = {} @@ -130,35 +118,29 @@ def process_google_results(self, result): geometry = result[0]['geometry'] address_components = result[0]['address_components'] - # todo: result[1] or more? + # TODO: result[1] or more? dict_output['latitude'] = geometry['location']['lat'] dict_output['longitude'] = geometry['location']['lng'] dict_output['rating'] = geometry['location_type'] try: dict_output['zip_code'] = address_components[7]['short_name'] - except Exception, error: - log.exception(error, exc_info=True) + except Exception: + log.info("No zip code.") dict_output['zip_code'] = "None" - # log.debug(dict_output) - return dict_output def geocode(self): - ''' - Updates latitude, longitude, rating and zip_code fields in the - locations table using the Google Geocoding API. - ''' - + """Update latitude, longitude, rating & zip in the locations table.""" log.debug('Geocode') - print '\nGeocoding...' + print('\nGeocoding...') null_query = self.get_rows_with_null_rating() for row in null_query: - full_address = row.street_number + ' ' + row.address + \ - ', New Orleans, LA' + full_address = "{0} {1}, New Orleans, LA".format( + row.street_number, row.address) # Let it fail on any errors so API doesn't continue to get hit. geocode_result = self.gmaps.geocode(full_address) @@ -171,25 +153,13 @@ def geocode(self): u = u.where(Location.document_id == row.document_id) SESSION.execute(u) SESSION.flush() - except Exception, error: + except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit() - # break - - # session.close() - log.debug('Done geocoding') if __name__ == '__main__': - try: - Geocode( - initial_date='2014-02-18', - until_date='2014-05-08' - ).get_rows_with_null_rating() - except Exception, error: - log.exception(error, exc_info=True) - pass diff --git a/realestate/lib/get_dates.py b/scripts/get_dates.py similarity index 94% rename from realestate/lib/get_dates.py rename to scripts/get_dates.py index da52ef8..b77c50c 100644 --- a/realestate/lib/get_dates.py +++ b/scripts/get_dates.py @@ -2,11 +2,10 @@ """Get date range for initialize script.""" -import os from datetime import timedelta -from realestate.db import Detail -from realestate import log, YESTERDAY_DATE, OPENING_DATE, SESSION +from www.db import Detail +from www import log, YESTERDAY_DATE, OPENING_DATE, SESSION class GetDates(object): diff --git a/scripts/initialize.py b/scripts/initialize.py index 56a4110..a4d187d 100644 --- a/scripts/initialize.py +++ b/scripts/initialize.py @@ -1,121 +1,166 @@ # -*- coding: utf-8 -*- -''' -Calls on other classes to build, geocode, clean and publish records to the -cleaned table. Can receive a date range or determine the dates on its own. -''' - -import sys - -from realestate.lib.build import Build -from realestate.lib.clean import Clean -from realestate.lib.geocode import Geocode -from realestate.lib.get_dates import GetDates -# from realestate.lib.mail import Mail -from realestate.lib.publish import Publish -# from realestate.lib.check_temp_status import CheckTemp -# from realestate.lib.email_template import EmailTemplate -from realestate import log # LOG_DIR, OPENING_DAY - - -class Initialize(object): - - ''' - Calls on other classes to build, geocode, clean and publish - records to the cleaned table. - ''' - - def __init__(self, initial_date=None, until_date=None): - '''Runs through all classes.''' - - if initial_date is not None and until_date is not None: - self.initial_date = initial_date - self.until_date = until_date - else: - date_range = GetDates().get_date_range() - self.initial_date = date_range['initial_date'] - self.until_date = date_range['until_date'] - - log.debug('self.initial_date: %s', self.initial_date) - log.debug('self.until_date: %s', self.until_date) - - try: - Build( - initial_date=self.initial_date, - until_date=self.until_date - ).build_all() - - except Exception, error: - log.exception(error, exc_info=True) - - Geocode( - initial_date=self.initial_date, - until_date=self.until_date - ).geocode() # Geocoding takes over an hour - Geocode().update_locations_with_neighborhoods() - - try: - Publish( - initial_date=self.initial_date, - until_date=self.until_date - ).main() - except Exception, error: - log.exception(error, exc_info=True) - - try: - Clean( - initial_date=self.initial_date, - until_date=self.until_date - ).main() - except Exception, error: - log.exception(error, exc_info=True) - - # dashboard_sync.DashboardSync() # todo - - Clean( - initial_date=self.initial_date, - until_date=self.until_date - ).update_cleaned_geom() - - # CheckTemp( - # initial_date=self.initial_date, - # until_date=self.until_date - # ).check_permanent_status_of_new_sales() - - # CheckTemp( - # initial_date=self.initial_date, - # until_date=self.until_date - # ).check_permanent_status_of_temp_sales() - - # check_assessor_urls().check( - # initial_date=initial_date, until_date=until_date) +""" +Build, geocode, clean and publish records to the cleaned table. + +Use command-line arguments to specify a date range or use yesterday as default. + +Usage: + initialize.py + initialize.py + initialize.py + +Options: + -h, --help Show help screen. + --version Show version number. + +Dates are in the format YYYY-MM-DD. Ex. 2016-12-31 +""" + +from datetime import datetime +from docopt import docopt + +from scripts.build import Build +from scripts.clean import Clean +from scripts.geocode import Geocode +from scripts.get_dates import GetDates +# from scripts.mail import Mail +from scripts.publish import Publish +# from scripts.check_temp_status import CheckTemp +# from scripts.email_template import EmailTemplate +from www import log # LOG_DIR, OPENING_DAY + + +class BadDateRangeError(Exception): + """Error for when date range is backward.""" + + pass + + +def initialize(initial_date=None, until_date=None): + """Build, geocode, clean and publish records to the cleaned table.""" + if initial_date is None or until_date is None: + date_range = GetDates().get_date_range() + initial_date = date_range['initial_date'] + until_date = date_range['until_date'] + + log.debug('self.initial_date: {}'.format(initial_date)) + log.debug('self.until_date: {}'.format(until_date)) + + # try: # TODO + Build(initial_date=initial_date, until_date=until_date).build_all() + + # except Exception as error: + # log.exception(error, exc_info=True) + + # Geocoding takes over an hour + Geocode(initial_date=initial_date, until_date=until_date).geocode() + Geocode().update_locations_with_neighborhoods() + + # try: # TODO + Publish(initial_date=initial_date, until_date=until_date).main() + # except Exception as error: + # log.exception(error, exc_info=True) + + # try: # TODO + Clean(initial_date=initial_date, until_date=until_date).main() + # except Exception as error: + # log.exception(error, exc_info=True) + + Clean( + initial_date=initial_date, + until_date=until_date + ).update_cleaned_geom() + + # TODO: Send email summary + + # CheckTemp( + # initial_date=self.initial_date, + # until_date=self.until_date + # ).check_permanent_status_of_new_sales() + + # CheckTemp( + # initial_date=self.initial_date, + # until_date=self.until_date + # ).check_permanent_status_of_temp_sales() + + +def cli_has_errors(arguments): + """Check for any CLI parsing errors.""" + all_arguments = ( + arguments[''] is not None and + arguments[''] is not None and + arguments[''] is not None) + + if all_arguments: + # print("Must use single date or date range, but not both.") + return True + + single_and_other_arguments = ( + ( + arguments[''] is not None and + arguments[''] is not None + ) or + ( + arguments[''] is not None and + arguments[''] is not None + )) + + if single_and_other_arguments: + # print("Cannot use a single date and a date range bound.") + return True + + one_date_bound_only = ( + ( + arguments[''] is not None and + arguments[''] is None + ) or + ( + arguments[''] is None and + arguments[''] is not None + )) + + if one_date_bound_only: + # print("Must pick both ends of a date range bound.") + return True + + # All good + return False + + +def cli(arguments): + """Parse command-line arguments.""" + # Catch any missed errors + if cli_has_errors(arguments): + return + + if arguments['']: # Single date + early_date = arguments[''] + late_date = arguments[''] + + log.info('Initializing single date: {}.'.format(early_date)) + elif arguments[''] and arguments['']: # Date range + early_date = arguments[''] + late_date = arguments[''] + + log.info('Initializing date range: {0} to {1}.'.format( + early_date, late_date)) + else: # No dates provided + log.info('Initializing all dates that need it.') + + initialize() # Default: initialize all in need. + return + + # Check for errors + early_datetime = datetime.strptime(early_date, "%Y-%m-%d") + late_datetime = datetime.strptime(late_date, "%Y-%m-%d") + + if early_datetime > late_datetime: + raise BadDateRangeError("The date range does not make sense.") + + initialize(initial_date=early_date, until_date=late_date) if __name__ == '__main__': - try: - if len(sys.argv) < 2: # No arguments - # Default is to build and clean anything that needs it. - # Specify custom date range in 'YYYY-mm-dd' string format - # or use variables such as OPENING_DAY, YESTERDAY_DAY. - Initialize() - if len(sys.argv) == 2: # One argument - day = sys.argv[1] - - Initialize( - initial_date=day, - until_date=day) - elif len(sys.argv) == 3: # Two arguments - initial_day = sys.argv[1] - until_day = sys.argv[2] - - Initialize( - initial_date=initial_day, - until_date=until_day) - elif len(sys.argv) > 3: - print ( - "Too many arguments. Enter a single date to build that one " + - "day, enter two days to build a range of days, or do not " + - "enter any days at all to build all days that need it. " + - "Use the format 'YYYY-MM-DD'.") - except Exception, error: - log.exception(error, exc_info=True) + arguments = docopt(__doc__, version="0.0.1") + cli(arguments) diff --git a/realestate/lib/libraries.py b/scripts/libraries.py similarity index 81% rename from realestate/lib/libraries.py rename to scripts/libraries.py index 5850046..37084fc 100644 --- a/realestate/lib/libraries.py +++ b/scripts/libraries.py @@ -1,18 +1,15 @@ # -*- coding: utf-8 -*- -''' -A collection of useful data, such as abbreviations, acronyms, neighborhood -names and noteworthy names (needs more work). -''' +"""A collection of abbreviations, acronyms and neighborhoods.""" class Library(object): - - '''The items.''' + """Common items to translate or clean.""" def __init__(self): + """The library items.""" # http://en.wikipedia.org/wiki/Street_or_road_name - # #Street_type_designations + # Street_type_designations self.assessor_abbreviations = [ # Major roads ['HIGHWAY', 'HW'], @@ -176,12 +173,12 @@ def __init__(self): ['9Th', '9th'], ['0Th', '0th'] ] - ''' + """ Not sure what to do for "St.". This --> [' St', ' St.'] would also pick up something such as 123 Straight Road. The same could conceivably happen with "Ave". "Dr" needs to become "Drive", but have the same problem - ''' + """ self.middle_initials = [ [' A ', ' A. '], [' B ', ' B. '], @@ -224,47 +221,3 @@ def __init__(self): ['6th', '6'], ['7th', '7'], ] - # todo: json with government offices: mayor, council, judges, etc - self.politicians = [ - "Mitch Landrieu", - "Andy Kopplin", - "Andrew Kopplin" - ] - self.council_members = [ - "Nadine Ramsey", - "Jason Williams", - "LaToya Cantrell", - "Stacy Head", - "Susan Guidry", - "Jared Brossett", - "James Gray", - "Jacquelyn Clarkson", - "Jackie Clarkson", - "Kristin Palmer", - "Cynthia Hedge-Morrell", - "Diana Bajoie", - "Ernest Charbonnet", - "Jon Johnson", - "Eric Granderson", - "Arnie Fielkow", - "James Carter", - "Cynthia Willard-Lewis", - "Shelley Midura" - ] - self.athletes = [ - "Drew Brees", - "Anthony Davis", - "Tyreke Evans", - "Eric Gordon" - ] - - # todo: get list from imdb or similar - self.celebrities = [ - "Sandra Bullock", - "Brad Pitt", - "Matthew McCaunaghey", - "David Simon" - ] - -if __name__ == '__main__': - pass diff --git a/scripts/local_backup.sh b/scripts/local_backup.sh new file mode 100755 index 0000000..423cd36 --- /dev/null +++ b/scripts/local_backup.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +source `which virtualenvwrapper.sh` + +workon realestate + +# Copy server backup files to local directory +rsync -avzh ubuntu@vault.thelensnola.org:/backups/realestate/ $PYTHONPATH/backups/ +rsync -avzh ubuntu@vault.thelensnola.org:/home/ubuntu/realestate/data/ $PYTHONPATH/data/ + +deactivate diff --git a/realestate/lib/mail.py b/scripts/mail.py similarity index 97% rename from realestate/lib/mail.py rename to scripts/mail.py index e4a4a45..a2361ab 100644 --- a/realestate/lib/mail.py +++ b/scripts/mail.py @@ -9,7 +9,8 @@ from os.path import basename from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText -from realestate import log + +from www import log class Mail(object): @@ -73,6 +74,7 @@ def send_with_attachment(self, files=None): content_type = 'application/octet-stream' main_type, sub_type = content_type.split('/', 1) + # TODO: Problem here for .encode() on bytes obj fp = open(f, 'rb') message = MIMEText(fp.read(), _subtype=sub_type) fp.close() diff --git a/scripts/main.sh b/scripts/main.sh index 3b83c4a..8408c0b 100755 --- a/scripts/main.sh +++ b/scripts/main.sh @@ -3,6 +3,8 @@ source `which virtualenvwrapper.sh` workon realestate -python /home/ubuntu/realestate/realestate/lib/scrape.py + +python /home/ubuntu/realestate/scripts/scrape.py python /home/ubuntu/realestate/scripts/initialize.py + deactivate diff --git a/scripts/make_db.py b/scripts/make_db.py index f2f9a35..3c6f400 100644 --- a/scripts/make_db.py +++ b/scripts/make_db.py @@ -2,165 +2,114 @@ """ Create database and make tables. + Creates the database if not yet created, creates tables, imports geographic data and creates spatial indexes. It makes use of `db.py`. """ -import os from sqlalchemy import create_engine +from sqlalchemy.exc import OperationalError from subprocess import call, Popen, PIPE -from realestate import log, DATABASE_NAME, GEO_DIR -from realestate import db +from www.db import Base +from www import log, DATABASE_NAME, ENGINE_STRING, GEO_DIR class MakeDB(object): - - '''Create database and make tables.''' + """Create database and make tables.""" def __init__(self): - '''docstring''' + """Create database.""" + try: + self._database_connection() + except OperationalError as error: + print(error) + log.exception(error, exc_info=True) - self.engine = create_engine( - 'postgresql://%s:%s@localhost/%s' % ( - os.environ.get('REAL_ESTATE_DATABASE_USERNAME'), - os.environ.get('REAL_ESTATE_DATABASE_PASSWORD'), - DATABASE_NAME - ) - ) + db_error = ( + '(psycopg2.OperationalError) FATAL: database "{}" ' + + 'does not exist').format(DATABASE_NAME) - def main(self): - '''Run all methods.''' + if str(error).strip() == db_error: + self.create_db() - self.create_db() - self.create_tables() - self.import_neighorhoods() - self.spatial_index_on_cleaned_geom() + self._create_tables() + self._import_neighorhoods() + self._spatial_index_on_cleaned_geom() - @staticmethod - def create_db(): - """ - Create database if it doesn\'t already exist. - """ + self.conn.close() + def create_db(self): + """Create database.""" log.debug('create_db') - try: - engine = create_engine( - 'postgresql://%s:%s@localhost/%s' % ( - os.environ.get('REAL_ESTATE_DATABASE_USERNAME'), - os.environ.get('REAL_ESTATE_DATABASE_PASSWORD'), - DATABASE_NAME - ) - ) - conn = engine.connect() - sql = "SELECT 1;" - conn.execute(sql) - except Exception, error: - log.exception(error, exc_info=True) - call([ - 'createdb', - '%s' % DATABASE_NAME - ]) - - engine = create_engine( - 'postgresql://%s:%s@localhost/%s' % ( - os.environ.get('REAL_ESTATE_DATABASE_USERNAME'), - os.environ.get('REAL_ESTATE_DATABASE_PASSWORD'), - DATABASE_NAME - ) - ) - engine.execute("CREATE EXTENSION POSTGIS;") - - conn.close() - - @staticmethod - def create_tables(): - '''Create tables in SQLAlchemy.''' + call(['createdb', DATABASE_NAME]) # Create database + self._database_connection() # Connect to database + self._add_db_extensions() # Add Postgres extensions + + def _add_db_extensions(self): + """Add Postgres extensions.""" + self.engine.execute("CREATE EXTENSION POSTGIS;") + + def _database_connection(self): + """Create connection to the database.""" + self.engine = create_engine(ENGINE_STRING) + self.conn = self.engine.connect() + def _create_tables(self): + """Create tables in SQLAlchemy.""" log.debug('create_tables') - engine = create_engine( - 'postgresql://%s:%s@localhost/%s' % ( - os.environ.get('REAL_ESTATE_DATABASE_USERNAME'), - os.environ.get('REAL_ESTATE_DATABASE_PASSWORD'), - DATABASE_NAME - ) - ) - db.Base.metadata.create_all(engine) + Base.metadata.create_all(self.engine) - @staticmethod - def import_neighorhoods(): - '''Import neighborhood shapefiles.''' + def _import_neighorhoods(self): + """Import neighborhood shapefiles.""" + # TODO: This causes errors on second run. log.debug('import_neighorhoods') - engine = create_engine( - 'postgresql://%s:%s@localhost/%s' % ( - os.environ.get('REAL_ESTATE_DATABASE_USERNAME'), - os.environ.get('REAL_ESTATE_DATABASE_PASSWORD'), - DATABASE_NAME - ) - ) - conn = engine.connect() - p1 = Popen( [ 'shp2pgsql', '-I', - '-a', # appends data to existing table. don't create. - '{0}/neighborhoods/shapefile'.format(GEO_DIR) + - '/Neighborhood_Statistical_Areas', + '-a', # Append data to existing table. Don't create. + ( + '{}/neighborhoods/shapefile/Neighborhood_Statistical_Areas' + ).format(GEO_DIR), 'neighborhoods' ], - stdout=PIPE - ) + stdout=PIPE) p2 = Popen( [ 'psql', '-d', - '%s' % DATABASE_NAME + DATABASE_NAME ], stdin=p1.stdout, - stdout=PIPE - ) + stdout=PIPE) p1.stdout.close() # Allow p1 to receive a SIGPIPE if p2 exits. p2.communicate()[0] # If need to alter geometry's SRID - conn.execute(""" + self.conn.execute(""" SELECT updategeometrysrid('neighborhoods', 'geom', 3452);""") - conn.execute(""" + self.conn.execute(""" ALTER TABLE neighborhoods ALTER COLUMN geom TYPE geometry(MultiPolygon, 4326) USING ST_Transform(geom, 4326);""") - conn.close() - - @staticmethod - def spatial_index_on_cleaned_geom(): - '''Create spatial index on cleaned table.''' - + def _spatial_index_on_cleaned_geom(self): + """Create spatial index on cleaned table.""" log.debug('spatial_index_on_cleaned_geom') - engine = create_engine( - 'postgresql://%s:%s@localhost/%s' % ( - os.environ.get('REAL_ESTATE_DATABASE_USERNAME'), - os.environ.get('REAL_ESTATE_DATABASE_PASSWORD'), - DATABASE_NAME - ) - ) - conn = engine.connect() - sql = """ - CREATE INDEX index_cleaned_geom ON cleaned USING GIST(geom);""" - - conn.execute(sql) + CREATE INDEX index_cleaned_geom + ON cleaned + USING GIST(geom);""" - conn.close() + self.conn.execute(sql) if __name__ == '__main__': - MakeDB().main() - # MakeDB().create_db() + MakeDB() diff --git a/realestate/lib/parse.py b/scripts/parse.py similarity index 84% rename from realestate/lib/parse.py rename to scripts/parse.py index ca699db..e7aa0c4 100644 --- a/realestate/lib/parse.py +++ b/scripts/parse.py @@ -1,51 +1,46 @@ # -*- coding: utf-8 -*- """ -This contains all of the logic for parsing the different pages for each sale -and makes use of the [Beautiful Soup] -(http://www.crummy.com/software/BeautifulSoup/bs4/doc/) library. Returns -either a dict of list of dicts. +Parsing logic for various pages for each sale. + +Returns either a dict of list of dicts. """ import os from bs4 import BeautifulSoup -from realestate.lib.utils import Utils -from realestate import log +from www.utils import ( + convert_amount) +from www import log -class AllPurposeParser(object): - '''Parsing that is not specific to any table.''' +class AllPurposeParser(object): + """Parsing that is not specific to any table.""" def __init__(self, html_path): - ''' - Receives path to HTML file. + """ + Receive path to HTML file. :param html_path: A path to a sale file. Ex. '/path/OPR123456789.html' - - ''' - + """ self.document_id = self.get_document_id(html_path) @staticmethod def get_document_id(html_path): """ - Receives the file path to a sale's HTML and returns the sale's - document ID. + Parse HTML and return the document ID. - :param html_path: A path to a sale file. Ex. '/path/OPR123456789.html' + :param html_path: A path to a sale HTML. Ex. '/path/OPR123456789.html' :type html_path: string :returns: A string containing the document ID. Ex. 'OPR123456789' """ - doc_id = os.path.basename(html_path).split('.')[0] return doc_id class DetailParser(object): - - '''Parses the details section of the HTML.''' + """Parse the details section of the HTML.""" def __init__(self, html_path): """ @@ -59,7 +54,6 @@ def __init__(self, html_path): prior_conveyance_doc_type, cancel_status, remarks, no_pages_in_image, image """ - self.rows = self.get_rows(html_path) self.document_id = AllPurposeParser(html_path).document_id @@ -74,7 +68,7 @@ def __init__(self, html_path): self.page = self.get_field(7) self.document_date = self.get_field(8) self.document_recorded = self.get_field(9) - self.amount = Utils().convert_amount(self.get_field(10)) + self.amount = convert_amount(self.get_field(10)) self.status = self.get_field(11) self.prior_mortgage_doc_type = self.get_field(12) self.prior_conveyance_doc_type = self.get_field(13) @@ -91,9 +85,8 @@ def get_rows(self, html_path): :type html_path: string :returns: A list of the rows in the details table. """ - - html_file = open(html_path, 'r') - soup = BeautifulSoup(html_file.read()) + html_file = open(html_path, 'rb') + soup = BeautifulSoup(html_file.read(), "html.parser") rows = self.parse_rows(soup) @@ -105,14 +98,13 @@ def get_rows(self, html_path): @staticmethod def parse_rows(soup): """ - Receives BeautifulSoup object for details table and returns the + Receive BeautifulSoup object for details table and returns the rows. :param soup: A BeautifulSoup object for the details table. :type soup: object :returns: A list of the details table's rows. """ - rows = soup.find( 'table', id="ctl00_cphNoMargin_f_oprTab_tmpl0_documentInfoList" @@ -128,7 +120,6 @@ def get_field(self, row_id): :type row_id: string :returns: A string containing the field in the row. Ex. 'SALE' """ - cells = self.rows[row_id].find_all('td') field = str(cells[1].string) # 0 is key, 1 is value @@ -150,7 +141,6 @@ def form_dict(self): :returns: A dict containing all of the details table values. """ - log.debug('form_dict') dict_output = self.__dict__ @@ -161,21 +151,17 @@ def form_dict(self): class VendorParser(object): - - '''Parses the vendors section of the HTML.''' + """Parse the vendors section of the HTML.""" def __init__(self, html_path): - '''Establish self variables.''' - + """Establish self variables.""" self.rows = self.get_rows(html_path) - self.document_id = AllPurposeParser(html_path).document_id def get_rows(self, html_path): - '''Return rows for vendors.''' - - html_file = open(html_path, 'r') - soup = BeautifulSoup(html_file.read()) + """Return rows for vendors.""" + html_file = open(html_path, 'rb') + soup = BeautifulSoup(html_file.read(), "html.parser") rows = self.parse_rows(soup) @@ -187,14 +173,12 @@ def get_rows(self, html_path): @staticmethod def parse_rows(soup): """ - Receives BeautifulSoup object for vendors table and returns the - rows. + Receive BS object for vendors table and returns the rows. :param soup: A BeautifulSoup object for the vendors table. :type soup: object :returns: A list of the vendors table's rows. """ - rows = soup.find( 'table', id="ctl00_cphNoMargin_f_oprTab_tmpl0_DataList11" @@ -203,8 +187,7 @@ def parse_rows(soup): return rows def form_list(self): - '''Form list of dicts for vendors HTML.''' - + """Form list of dicts for vendors HTML.""" list_output = [] for i, row in enumerate(self.rows): @@ -224,34 +207,31 @@ def form_list(self): @staticmethod def get_field(row, cell_id): - '''Make corrections to dict values.''' - + """Make corrections to dict values.""" cells = row.find_all('span') cell = cells[cell_id] - if isinstance(cell, str) == 0: + if not isinstance(cell, str): cell = str(cell.string) + if cell.lower() == "none" or cell == '': cell = "" + return cell class VendeeParser(object): - - '''Parses the vendees section of the HTML.''' + """Parse the vendees section of the HTML.""" def __init__(self, html_path): - '''Establish self variables.''' - + """Establish self variables.""" self.rows = self.get_rows(html_path) - self.document_id = AllPurposeParser(html_path).document_id def get_rows(self, html_path): - '''Return rows for vendees.''' - - html_file = open(html_path, 'r') - soup = BeautifulSoup(html_file.read()) + """Return rows for vendees.""" + html_file = open(html_path, 'rb') + soup = BeautifulSoup(html_file.read(), "html.parser") rows = self.parse_rows(soup) @@ -270,16 +250,15 @@ def parse_rows(soup): :type soup: object :returns: A list of the vendees table's rows. """ - rows = soup.find( 'table', id="ctl00_cphNoMargin_f_oprTab_tmpl0_Datalist1" ).find_all('tr') + return rows def form_list(self): - '''Form list of dicts for vendees HTML.''' - + """Form list of dicts for vendees HTML.""" list_output = [] for i, row in enumerate(self.rows): @@ -299,34 +278,31 @@ def form_list(self): @staticmethod def get_field(row, cell_id): - '''Make corrections to dict values.''' - + """Make corrections to dict values.""" cells = row.find_all('span') cell = cells[cell_id] - if isinstance(cell, str) == 0: + if not isinstance(cell, str): cell = str(cell.string) - if cell.lower() == "none" or cell == '': + + if cell.lower() == "none": cell = "" + return cell class LocationParser(object): - - '''Parses the locations section of the HTML.''' + """Parse the locations section of the HTML.""" def __init__(self, html_path): - '''Establish self variables.''' - + """Establish self variables.""" self.rows = self.get_rows(html_path) - self.document_id = AllPurposeParser(html_path).document_id def get_rows(self, html_path): - '''Return rows for locations.''' - - html_file = open(html_path, 'r') - soup = BeautifulSoup(html_file.read()) + """Return rows for locations.""" + html_file = open(html_path, 'rb') + soup = BeautifulSoup(html_file.read(), "html.parser") rows = self.parse_rows(soup) @@ -338,14 +314,13 @@ def get_rows(self, html_path): @staticmethod def parse_rows(soup): """ - Receives BeautifulSoup object for locations table and returns the + Receive BeautifulSoup object for locations table and returns the rows. :param soup: A BeautifulSoup object for the locations table. :type soup: object :returns: A list of the locations table's rows. """ - rows = soup.find( 'table', id="ctl00_cphNoMargin_f_oprTab_tmpl1_ComboLegals" @@ -357,15 +332,16 @@ def parse_rows(soup): return rows def form_list(self): - '''Form list of dicts for locations HTML.''' - + """Form list of dicts for locations HTML.""" list_output = [] # Find number of mini tables: # 9 rows per table. A total of 9 rows if one table, but a total of # 19 rows if two, 29 if three, etc. # (Because of border row that only appears once multiple tables - number_of_tables = ((len(self.rows) - 9) / 10) + 1 + number_of_tables = int( + (len(self.rows) - 9) / 10 + ) + 1 for table_no in range(0, number_of_tables): dict_output = { @@ -381,8 +357,7 @@ def form_list(self): 'weeks': self.get_weeks(table_no), 'cancel_status_unit': self.get_cancel_status_unit(table_no), 'freeform_legal': self.get_freeform_legal(table_no), - 'document_id': self.document_id - } + 'document_id': self.document_id} # log.debug('dict_output:') # log.debug(dict_output) list_output.append(dict_output) @@ -391,24 +366,24 @@ def form_list(self): @staticmethod def convert_to_string(value): - ''' - Convert value to string. If value is equal to "None" in the HTML, then - rewrite to "". + """ + Convert value to string. If value is "None," then change to "". :param value: str. The value to convert. :type value: str :returns: str. The value as a string, and maybe an empty string if it matches certain criteria. - ''' - - if isinstance(value, str) == 0: + """ + if not isinstance(value, str): value = str(value.string) + if value.lower() == "none": value = "" + return value def get_field(self, row_index, cell_index, table_no): - ''' + """ Receive the table number and row and cell indeces. Return the field value for that location. @@ -420,8 +395,7 @@ def get_field(self, row_index, cell_index, table_no): :param table_notable_no: Int. Which locations table in the HTML. :type table_no: int :returns: String. The field for this (row, cell) location. - ''' - + """ overall_index = table_no * 10 + row_index field = self.rows[overall_index].find_all('span')[cell_index] @@ -429,14 +403,13 @@ def get_field(self, row_index, cell_index, table_no): return self.convert_to_string(field) def get_subdivision(self, table_no): - ''' - Receives the table number and returns the subdivision field value. + """ + Receive the table number and returns the subdivision field value. :param table_no: Int. Which locations table in the HTML. :type table_no: int :returns: String. The subdivision listed in this particular table. - ''' - + """ row_index = 0 cell_index = 1 @@ -445,14 +418,13 @@ def get_subdivision(self, table_no): return subdivision def get_condo(self, table_no): - ''' - Receives the table number and returns the condo field value. + """ + Receive the table number and returns the condo field value. :param table_no: Int. Which locations table in the HTML. :type table_no: int :returns: String. The condo listed in this particular table. - ''' - + """ row_index = 1 cell_index = 1 @@ -461,14 +433,13 @@ def get_condo(self, table_no): return condo def get_district(self, table_no): - ''' + """ Receives the table number and returns the district field value. :param table_no: Int. Which locations table in the HTML. :type table_no: int :returns: String. The district listed in this particular table. - ''' - + """ row_index = 3 cell_index = 1 @@ -477,14 +448,13 @@ def get_district(self, table_no): return district def get_square(self, table_no): - ''' + """ Receives the table number and returns the square field value. :param table_no: Int. Which locations table in the HTML. :type table_no: int :returns: String. The square listed in this particular table. - ''' - + """ row_index = 3 cell_index = 3 @@ -493,14 +463,13 @@ def get_square(self, table_no): return square def get_street_number(self, table_no): - ''' + """ Receives the table number and returns the street number field value. :param table_no: Int. Which locations table in the HTML. :type table_no: int :returns: String. The street number listed in this particular table. - ''' - + """ row_index = 5 cell_index = 1 @@ -509,14 +478,13 @@ def get_street_number(self, table_no): return street_number def get_address(self, table_no): - ''' + """ Receives the table number and returns the address field value. :param table_no: Int. Which locations table in the HTML. :type table_no: int :returns: String. The address listed in this particular table. - ''' - + """ row_index = 5 cell_index = 3 @@ -527,14 +495,13 @@ def get_address(self, table_no): return address def get_unit(self, table_no): - ''' + """ Receives the table number and returns the unit field value. :param table_no: Int. Which locations table in the HTML. :type table_no: int :returns: String. The unit listed in this particular table. - ''' - + """ row_index = 6 cell_index = 1 @@ -543,14 +510,13 @@ def get_unit(self, table_no): return unit def get_weeks(self, table_no): - ''' + """ Receives the table number and returns the weeks field value. :param table_no: Int. Which locations table in the HTML. :type table_no: int :returns: String. The weeks listed in this particular table. - ''' - + """ row_index = 6 cell_index = 3 @@ -559,7 +525,7 @@ def get_weeks(self, table_no): return weeks def get_cancel_status_unit(self, table_no): - ''' + """ Receives the table number and returns the first cancel status field value. @@ -567,8 +533,7 @@ def get_cancel_status_unit(self, table_no): :type table_no: int :returns: String. The first cancel status listed in this particular table. - ''' - + """ row_index = 6 cell_index = 5 @@ -577,14 +542,13 @@ def get_cancel_status_unit(self, table_no): return cancel_status_unit def get_freeform_legal(self, table_no): - ''' + """ Receives the table number and returns the freeform legal field value. :param table_no: Int. Which locations table in the HTML. :type table_no: int :returns: String. The freeform legal listed in this particular table. - ''' - + """ row_index = 8 cell_index = 1 @@ -593,7 +557,7 @@ def get_freeform_legal(self, table_no): return freeform_legal def get_cancel_status_lot(self, table_no): - ''' + """ Receives the table number and returns the second cancel status field value. @@ -601,8 +565,7 @@ def get_cancel_status_lot(self, table_no): :type table_no: int :returns: String. The second cancel status listed in this particular table. - ''' - + """ row_index = 3 overall_index = table_no * 10 + row_index @@ -618,7 +581,7 @@ def get_cancel_status_lot(self, table_no): return cancel_status_lot def get_lot(self, table_no): - ''' + """ Receives the table number and returns the lot field value. Includes checks for when there are "Lot from" and "Lot to" fields, or just a single "Lot" field. @@ -626,8 +589,7 @@ def get_lot(self, table_no): :param table_no: Int. Which locations table in the HTML. :type table_no: int :returns: String. The lot listed in this particular table. - ''' - + """ row_index = 3 overall_index = table_no * 10 + row_index @@ -644,15 +606,15 @@ def get_lot(self, table_no): elif len(frm) != 0 and len(to) == 0: lot = frm elif len(frm) != 0 and len(to) != 0: - lot = frm + " to " + to + lot = "{0} to {1}".format(frm, to) else: lot = "" return lot -if __name__ == '__main__': - # html_path = ( - # '%s/' % DATA_DIR + - # 'raw/2014-02-18/form-html/OPR288694480.html') - # print LocationParser(html_path).form_list() - pass +# if __name__ == '__main__': +# # html_path = ( +# # '%s/' % DATA_DIR + +# # 'raw/2014-02-18/form-html/OPR288694480.html') +# # print(LocationParser(html_path).form_list()) +# pass diff --git a/realestate/lib/publish.py b/scripts/publish.py similarity index 78% rename from realestate/lib/publish.py rename to scripts/publish.py index ae8e49f..64a8cda 100644 --- a/realestate/lib/publish.py +++ b/scripts/publish.py @@ -1,8 +1,9 @@ # -*- coding: utf-8 -*- -''' -Runs checks against the `cleaned` table to make sure information is suitable -for publication. Checks for things such as lat/long coordinates outside of +""" +Run checks on `cleaned` table to make sure data is ready for publication. + +Checks for things such as lat/long coordinates outside of New Orleans, sale amounts that are questionably high or low and whether the sale has a date or not. @@ -11,25 +12,19 @@ will be set as False. Sales will only appear in the table if `detail_publish` is True but `location_publish` is False. If both are False, then sale won't appear at all. -''' +""" -import os from datetime import datetime, timedelta -from realestate.db import ( - Detail, - Location -) -from realestate import log, SESSION +from www.db import Detail, Location +from www import log, SESSION class Publish(object): - - '''Runs checks for data integrity.''' + """Run checks for data integrity.""" def __init__(self, initial_date=None, until_date=None): - '''Initialize self variables and establish connection to database.''' - + """Initialize self variables and establish connection to database.""" self.initial_date = initial_date self.until_date = until_date @@ -37,11 +32,11 @@ def __init__(self, initial_date=None, until_date=None): log.debug('self.until_date: %s', self.until_date) def make_all_locations_publishable(self): - ''' - Assume all sales are publishable. Set location_publish = 1. - Then set to 0 if something questionable is found. - ''' + """ + Assume all sales are publishable. + Set location_publish = 1. Then set to 0 if questionable data is found. + """ # Assume publishable, then check for reasons not to publish. SESSION.query( Location.location_publish @@ -52,15 +47,14 @@ def make_all_locations_publishable(self): try: with SESSION.begin_nested(): SESSION.flush() - except Exception, error: + except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit() def check_geocoder_good_rating(self): - '''Check if PostGIS Geocoder rating scored 3 or lower: good.''' - + """Check if PostGIS Geocoder rating scored 3 or lower: good.""" SESSION.query( Location.rating, Location.location_publish @@ -72,15 +66,14 @@ def check_geocoder_good_rating(self): try: with SESSION.begin_nested(): SESSION.flush() - except Exception, error: + except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit() def check_geocoder_bad_rating(self): - '''Check if PostGIS Geocoder rating scored higher than 3: bad.''' - + """Check if PostGIS Geocoder rating scored higher than 3: bad.""" SESSION.query( Location.rating, Location.location_publish @@ -93,17 +86,14 @@ def check_geocoder_bad_rating(self): try: with SESSION.begin_nested(): SESSION.flush() - except Exception, error: + except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit() def check_west_of_new_orleans(self): - ''' - Check if geocoded lat/long found is within west border of New Orleans. - ''' - + """Check if geocoded coords are within west border of New Orleans.""" # Long less than -90.140388 is west of New Orleans: SESSION.query( Location.longitude, @@ -117,17 +107,14 @@ def check_west_of_new_orleans(self): try: with SESSION.begin_nested(): SESSION.flush() - except Exception, error: + except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit() def check_east_of_new_orleans(self): - ''' - Check if geocoded lat/long found is within east border of New Orleans. - ''' - + """Check if geocoded coords are within east border of New Orleans.""" # Long greater than -89 is east of New Orleans: SESSION.query( Location.longitude, @@ -141,17 +128,14 @@ def check_east_of_new_orleans(self): try: with SESSION.begin_nested(): SESSION.flush() - except Exception, error: + except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit() def check_south_of_new_orleans(self): - ''' - Check if geocoded lat/long found is within south border of New Orleans. - ''' - + """Check if geocoded coords are within south border of New Orleans.""" # Lat less than 29.864543 is south of New Orleans: SESSION.query( Location.latitude, @@ -165,17 +149,14 @@ def check_south_of_new_orleans(self): try: with SESSION.begin_nested(): SESSION.flush() - except Exception, error: + except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit() def check_north_of_new_orleans(self): - ''' - Check if geocoded lat/long found is within north border of New Orleans. - ''' - + """Check if geocoded coords are within north border of New Orleans.""" # Lat less than 29.864543 is north of New Orleans: SESSION.query( Location.latitude, @@ -189,18 +170,18 @@ def check_north_of_new_orleans(self): try: with SESSION.begin_nested(): SESSION.flush() - except Exception, error: + except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit() def make_all_details_publishable(self): - ''' - Assume all sales are publishable. Set detail_publishable = 1. - Then set to 0 if something questionable is found. - ''' + """ + Assume all sales are publishable. + Set detail_publishable = 1. Then set to 0 if questionable data found. + """ # Assume publishable, then check for reasons not to publish. SESSION.query( Detail.detail_publish @@ -211,15 +192,14 @@ def make_all_details_publishable(self): try: with SESSION.begin_nested(): SESSION.flush() - except Exception, error: + except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit() def check_if_no_date(self): - '''Check if sale has a date.''' - + """Check if sale has a date.""" SESSION.query( Detail.document_date, Detail.document_recorded, @@ -234,18 +214,14 @@ def check_if_no_date(self): try: with SESSION.begin_nested(): SESSION.flush() - except Exception, error: + except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit() def check_relative_date(self): - ''' - Check if the sale date is more than 6 months prior to the date the - sale was recorded. - ''' - + """Check if sale date is >6 months prior to the recorded date.""" # Convert date strings to date format new_initial_date = datetime.strptime( self.initial_date, '%Y-%m-%d').date() @@ -274,13 +250,13 @@ def check_relative_date(self): Detail.document_date, Detail.detail_publish ).filter( - Detail.document_recorded == '%s' % current_date_string + Detail.document_recorded == current_date_string ).filter( - Detail.document_date < '%s' % old_date_string + Detail.document_date < old_date_string ).update({"detail_publish": False}) SESSION.flush() - except Exception, error: + except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() @@ -291,15 +267,15 @@ def check_relative_date(self): Detail.document_date, Detail.detail_publish ).filter( - Detail.document_recorded == '%s' % current_date_string + Detail.document_recorded == current_date_string ).filter( - Detail.document_date > '%s' % previous_date_string + Detail.document_date > previous_date_string ).update({ "detail_publish": False }) SESSION.flush() - except Exception, error: + except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() @@ -307,11 +283,8 @@ def check_relative_date(self): current_date = current_date + timedelta(days=1) - # session.close() - def check_low_amount(self): - '''Check if sale amount is unreasonably low (<= $0).''' - + """Check if sale amount is unreasonably low (<= $0).""" # Not sure about these, so check them all for now to be safe SESSION.query( Detail.amount, @@ -325,15 +298,14 @@ def check_low_amount(self): try: with SESSION.begin_nested(): SESSION.flush() - except Exception, error: + except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit() def check_high_amount(self): - '''Check if sale amount is unreasonably high (>= $20,000,000).''' - + """Check if sale amount is unreasonably high (>= $20,000,000).""" # Anything over $20,000,000 wouldn't be impossible, but is rare SESSION.query( Detail.amount, @@ -347,17 +319,16 @@ def check_high_amount(self): try: with SESSION.begin_nested(): SESSION.flush() - except Exception, error: + except Exception as error: log.exception(error, exc_info=True) SESSION.rollback() SESSION.commit() def main(self): - '''Runs through each check method.''' - + """Run through each check method.""" log.debug('Publish') - print 'Publishing...' + print('Publishing...') self.make_all_locations_publishable() self.check_geocoder_bad_rating() diff --git a/scripts/rebuild.sh b/scripts/rebuild.sh new file mode 100755 index 0000000..2330fdd --- /dev/null +++ b/scripts/rebuild.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +source `which virtualenvwrapper.sh` + +workon realestate + +bash $PYTHONPATH/scripts/delete_db.sh +python $PYTHONPATH/scripts/make_db.py +python $PYTHONPATH/scripts/initialize.py + +deactivate diff --git a/realestate/lib/scrape.py b/scripts/scrape.py similarity index 72% rename from realestate/lib/scrape.py rename to scripts/scrape.py index d528168..9c73a49 100644 --- a/realestate/lib/scrape.py +++ b/scripts/scrape.py @@ -1,33 +1,51 @@ # -*- coding: utf-8 -*- """ -The daily scraper that checks for the previous day's sales. +Scrape land record data. It uses [Selenium](http://github.com/SeleniumHQ/selenium/tree/master/py) and [PhantomJS](http://phantomjs.org/) to save the HTML. This also makes a note of when each date was scraped and what the Land Records Division's permanent date range was at the time of that scrape (see `check_temp_status.py` for details). + +Usage: + scrape.py + scrape.py + scrape.py + +Options: + -h, --help Show help screen. + --version Show version number. + +Dates are in the format YYYY-MM-DD. Ex. 2016-12-31 """ import os import re -import sys -import time import glob +import time from bs4 import BeautifulSoup +from datetime import timedelta, datetime +from docopt import docopt from selenium import webdriver from selenium.webdriver.common.keys import Keys -from datetime import timedelta, datetime -from realestate.lib.mail import Mail -from realestate import log, YESTERDAY_DAY, PROJECT_DIR, LOG_FILE + +from scripts.mail import Mail +from www import log, YESTERDAY_DAY, PROJECT_DIR, LOG_FILE # Uncomment for local development and testing: # from selenium.webdriver.common.desired_capabilities import ( # DesiredCapabilities) +class BadDateRangeError(Exception): + """Error for when date range is backward.""" + + pass + + class Scrape(object): """ Navigate and scrape the Land Records Division. @@ -46,7 +64,7 @@ def __init__(self, # PhantomJS for headless browser in production self.driver = webdriver.PhantomJS( - executable_path='%s/scripts/phantomjs' % PROJECT_DIR, + executable_path='{}/scripts/phantomjs'.format(PROJECT_DIR), service_log_path=LOG_FILE, port=0) @@ -59,7 +77,7 @@ def __init__(self, # firefox_capabilities = DesiredCapabilities.FIREFOX # firefox_capabilities['marionette'] = True # self.driver = webdriver.Firefox( - # executable_path='%s/scripts/wires' % PROJECT_DIR, + # executable_path='{}/scripts/wires'.format(PROJECT_DIR), # capabilities=firefox_capabilities, # timeout=60) @@ -120,7 +138,7 @@ def is_logged_in(self): self.driver.find_element_by_id("Header1_lnkLogout") log.debug("Login successful.") return True - except Exception, error: + except Exception as error: log.debug("Login failed.") log.error(error, exc_info=True) return False @@ -155,18 +173,19 @@ def find_permanent_date_range(self): def get_date_range_html(self): """Get search page HTML.""" - date_range_html = self.driver.page_source - - return date_range_html + return self.driver.page_source @staticmethod def delete_permanent_date_range_when_scraped_file(year, month, day): """Delete old permanent-date-range-when-scraped*.html.""" log.info('Delete old permanent-date-range-when-scraped*.html') - for file_path in glob.glob("%s/data/raw/" % (PROJECT_DIR) + - "%s-%s-%s/" % (year, month, day) + - "permanent-date-range-when-scraped*.html"): + string = ( + "{0}/data/raw/{1}-{2}-{3}/" + + "permanent-date-range-when-scraped*.html").format( + PROJECT_DIR, year, month, day) + + for file_path in glob.glob(string): os.remove(file_path) @staticmethod @@ -177,10 +196,12 @@ def save_permanent_date_range_when_scraped_file(year, month, day, # Save permanent date range for this individual sale. log.info('Save new permanent-date-range-when-scraped*.html file') individual_html_out = open( - "%s/data/raw/" % PROJECT_DIR + - "%s-%s-%s/permanent-date-range-when-scraped" % (year, month, day) + - "_%s-%s.html" % (first_date, second_date), - "w") + ( + "{0}/data/raw/{1}-{2}-{3}/" + + "permanent-date-range-when-scraped_{4}-{5}.html").format( + PROJECT_DIR, year, month, day, first_date, second_date + ), + "wb") individual_html_out.write(date_range_html.encode('utf-8')) individual_html_out.close() @@ -191,7 +212,7 @@ def delete_permanent_date_range_file(): log.info( 'Delete old most-recent-permanent-date-range/*.html file') - file_string = "%s/data/most-recent-permanent-date-range/*.html" % ( + file_string = "{}/data/most-recent-permanent-date-range/*.html".format( PROJECT_DIR) for file_path in glob.glob(file_string): os.remove(file_path) @@ -202,10 +223,10 @@ def save_permanent_date_range_file(date_range_html, second_date): """Save new most-recent-permanent-date-range/*.html.""" log.info('Save new most-recent-permanent-date-range/*.html file') - overall_html_out = open("%s/data/" % (PROJECT_DIR) + - "most-recent-permanent-date-range/" + - "%s-%s.html" % (first_date, second_date), - "w") + overall_html_out = open( + "{0}/data/most-recent-permanent-date-range/{1}-{2}.html".format( + PROJECT_DIR, first_date, second_date), + "wb") overall_html_out.write(date_range_html.encode('utf-8')) overall_html_out.close() @@ -300,22 +321,24 @@ def parse_results(self, year, month, day): "cphNoMargin_cphNoMargin_OptionsBar1_ItemList") log.info('Find option') options = item_list_elem.find_elements_by_tag_name("option") - except Exception, error: + except Exception as error: # Save table page log.error(error, exc_info=True) log.info('No sales for this day') - html_out = open("%s/data/raw/%s-%s-%s/page-html/page1.html" - % (PROJECT_DIR, year, month, day), "w") + html_out = open( + "{0}/data/raw/{1}-{2}-{3}/page-html/page1.html".format( + PROJECT_DIR, year, month, day + ), "wb") html_out.write((self.driver.page_source).encode('utf-8')) html_out.close() return total_pages = int(options[-1].get_attribute('value')) - log.debug('%d pages to parse for %s-%s-%s', - total_pages, year, month, day) + log.debug('{0} pages to parse for {1}-{2}-{3}'.format( + total_pages, year, month, day)) for i in range(1, total_pages + 1): - log.debug('Page: %d', i) + log.debug('Page: {}'.format(i)) self.parse_page(i, year, month, day) time.sleep(15.0) @@ -324,12 +347,14 @@ def parse_page(self, i, year, month, day): """Parse results page for sale document IDs.""" # Save table page log.info('Parse results page table HTML') - html_out = open("%s/data/raw/%s-%s-%s/page-html/page%d.html" - % (PROJECT_DIR, year, month, day, i), "w") + html_out = open(( + "{0}/data/raw/{1}-{2}-{3}/page-html/page{4}.html").format( + PROJECT_DIR, year, month, day, i), + "wb") html_out.write((self.driver.page_source).encode('utf-8')) html_out.close() - bs_file = "%s/data/raw/%s-%s-%s/page-html/page%d.html" % ( + bs_file = "{0}/data/raw/{1}-{2}-{3}/page-html/page{4}.html".format( PROJECT_DIR, year, month, day, i) soup = BeautifulSoup(open(bs_file), "html.parser") @@ -338,12 +363,13 @@ def parse_page(self, i, year, month, day): # For this one page rows = soup.find_all('td', class_="igede12b9e") # List of Object IDs - log.debug('There are %d rows for this page', len(rows)) + log.debug('There are {} rows for this page'.format(len(rows))) for j in range(1, len(rows)): + overall_row = (i - 1) * 20 + j log.debug( - 'Analyzing overall row %d for %s-%s-%s', - (i - 1) * 20 + j, year, month, day) + 'Analyzing overall row {0} for {1}-{2}-{3}'.format( + overall_row, year, month, day)) self.parse_sale(j, rows, year, month, day) time.sleep(5.0) @@ -372,37 +398,37 @@ def parse_sale(self, j, rows, year, month, day): document_id = rows[j].string log.debug( - 'Saving HTML for %s on %s-%s-%s', document_id, year, month, day) + 'Saving HTML for {0} on {1}-{2}-{3}'.format( + document_id, year, month, day)) single_sale_url = ( "http://onlinerecords.orleanscivilclerk.com/" + "RealEstate/SearchResults.aspx?" + "global_id={}" + "&type=dtl").format(document_id) - log.debug('Sale URL: %s', single_sale_url) + log.debug('Sale URL: {}'.format(single_sale_url)) try: - log.info('Loading %s', single_sale_url) + log.info('Loading {}'.format(single_sale_url)) self.load_url(single_sale_url) - except Exception, error: + except Exception as error: log.error(error, exc_info=True) html = self.driver.page_source log.info('Save this sale HTML') - html_file = "%s/data/raw/" % (PROJECT_DIR) + \ - "%s-%s-%s/" % (year, month, day) + \ - "form-html/%s.html" % (document_id) + html_file = "{0}/data/raw/{1}-{2}-{3}/form-html/{4}.html".format( + PROJECT_DIR, year, month, day, document_id) - html_out = open(html_file, "w") + html_out = open(html_file, "wb") html_out.write(html.encode('utf-8')) html_out.close() try: - assert not self.check_if_error(html_file) - except Exception, error: - log.debug('Deleting error page: %s', html_file) + assert not self.is_error_page(html_file) + except Exception as error: + log.debug('Deleting error page: {}'.format(html_file)) log.error(error, exc_info=True) os.remove(html_file) @@ -410,17 +436,13 @@ def parse_sale(self, j, rows, year, month, day): return @staticmethod - def check_if_error(html): + def is_error_page(html): """Check if the downloaded single sale HTML is an error page.""" soup = BeautifulSoup(open(html), "html.parser") - title = soup.find_all('title')[0].string - if title == 'Error': - log.debug('Error page was downloaded.') - return True - else: - return False + # log.debug('Error page was downloaded.') + return title == 'Error' # Logout def logout(self): @@ -455,19 +477,19 @@ def cycle_through_dates(self): log.debug(year + '-' + month + '-' + day) # Check if folder for this day exists. If not, then make one. - pagedir = "%s/data/raw/%s-%s-%s/page-html" % ( + pagedir = "{0}/data/raw/{1}-{2}-{3}/page-html".format( PROJECT_DIR, year, month, day) log.debug(pagedir) - formdir = "%s/data/raw/%s-%s-%s/form-html" % ( + formdir = "{0}/data/raw/{1}-{2}-{3}/form-html".format( PROJECT_DIR, year, month, day) log.debug(formdir) if not os.path.exists(pagedir): - log.info('Making %s', pagedir) + log.info('Making {}'.format(pagedir)) os.makedirs(pagedir) if not os.path.exists(formdir): - log.info('Making %s', formdir) + log.info('Making {}'.format(formdir)) os.makedirs(formdir) search_date = month + day + year @@ -489,7 +511,7 @@ def main(self): try: self.cycle_through_dates() - except Exception, error: + except Exception as error: log.error(error, exc_info=True) m = Mail( @@ -505,23 +527,82 @@ def main(self): log.info('Done!') + +def cli_has_errors(arguments): + """Check for any CLI parsing errors.""" + all_arguments = ( + arguments[''] is not None and + arguments[''] is not None and + arguments[''] is not None) + + if all_arguments: + # print("Must use single date or date range, but not both.") + return True + + single_and_other_arguments = ( + ( + arguments[''] is not None and + arguments[''] is not None + ) or + ( + arguments[''] is not None and + arguments[''] is not None + )) + + if single_and_other_arguments: + # print("Cannot use a single date and a date range bound.") + return True + + one_date_bound_only = ( + ( + arguments[''] is not None and + arguments[''] is None + ) or + ( + arguments[''] is None and + arguments[''] is not None + )) + + if one_date_bound_only: + # print("Must pick both ends of a date range bound.") + return True + + # All good + return False + + +def cli(arguments): + """Parse command-line arguments.""" + # Catch any missed errors + if cli_has_errors(arguments): + return + + if arguments['']: # Single date + early_date = arguments[''] + late_date = arguments[''] + + log.info('Scraping single date: {}.'.format(early_date)) + elif arguments[''] and arguments['']: # Date range + early_date = arguments[''] + late_date = arguments[''] + + log.info('Scraping date range: {0} to {1}.'.format( + early_date, late_date)) + else: # No dates provided + log.info('Scraping yesterday.') + + Scrape().main() # Default: scrape yesterday. + return + + # Check for errors + early_datetime = datetime.strptime(early_date, "%Y-%m-%d") + late_datetime = datetime.strptime(late_date, "%Y-%m-%d") + + if early_datetime > late_datetime: + raise BadDateRangeError("The date range does not make sense.") + + Scrape(initial_date=early_date, until_date=late_date).main() + if __name__ == '__main__': - if len(sys.argv) < 2: # No arguments, default to yesterday date. - Scrape().main() - elif len(sys.argv) == 2: # One argument - day = sys.argv[1] - - s = Scrape(initial_date=day, until_date=day) - s.main() - elif len(sys.argv) == 3: # Two arguments - initial_day = sys.argv[1] - until_day = sys.argv[2] - - s = Scrape(initial_date=initial_day, until_date=until_day) - s.main() - elif len(sys.argv) > 3: - print( - "Too many arguments. Enter a single date to scrape that one " + - "day, enter two days to scrape a range of days, or do not " + - "enter any days at all to scrape yesterday. " + - "Use the format 'YYYY-MM-DD'.") + arguments = docopt(__doc__, version="0.0.1") + cli(arguments) diff --git a/scripts/screen.js b/scripts/screen.js deleted file mode 100644 index e71e969..0000000 --- a/scripts/screen.js +++ /dev/null @@ -1,55 +0,0 @@ -// https://github.com/ariya/phantomjs/blob/master/examples/rasterize.js -// http://stackoverflow.com/questions/11917042/how-to-render-part-of-a-page-with-phantomjs - -var page = require('webpage').create(), - system = require('system'), - address, output, size; - -address = system.args[1]; -output = system.args[2]; -page.viewportSize = { width: 600, height: 600 }; -if (system.args.length > 3 && system.args[2].substr(-4) === ".pdf") { - size = system.args[3].split('*'); - page.paperSize = size.length === 2 ? { width: size[0], height: size[1], margin: '0px' } - : { format: system.args[3], orientation: 'portrait', margin: '1cm' }; -} else if (system.args.length > 3 && system.args[3].substr(-2) === "px") { - size = system.args[3].split('*'); - if (size.length === 2) { - pageWidth = parseInt(size[0], 10); - pageHeight = parseInt(size[1], 10); - page.viewportSize = { width: pageWidth, height: pageHeight }; - page.clipRect = { top: 0, left: 0, width: pageWidth, height: pageHeight }; - } else { - console.log("size:", system.args[3]); - pageWidth = parseInt(system.args[3], 10); - pageHeight = parseInt(pageWidth * 3/4, 10); - console.log ("pageHeight:",pageHeight); - page.viewportSize = { width: pageWidth, height: pageHeight }; - } -} -if (system.args.length > 4) { - page.zoomFactor = system.args[4]; -} -page.open(address, function (status) { - if (status !== 'success') { - console.log('Unable to load the address!'); - phantom.exit(1); - } else { - window.setTimeout(function () { - var clipRect = page.evaluate(function() { - document.querySelector('.leaflet-top').style.display = 'none'; - document.querySelector('.leaflet-right').style.display = 'none'; - document.querySelector('.leaflet-control-attribution').style.display = 'none'; - return document.querySelector('#map').getBoundingClientRect(); - }); - page.clipRect = { - top: clipRect.top, - left: clipRect.left, - width: clipRect.width, - height: clipRect.height - }; - page.render(output); - phantom.exit(); - }, 5200); - } -}); diff --git a/scripts/screen.py b/scripts/screen.py deleted file mode 100644 index f1f67a0..0000000 --- a/scripts/screen.py +++ /dev/null @@ -1,13 +0,0 @@ -# -*- coding: utf-8 -*- - -from subprocess import call -from realestate import PROJECT_DIR - -name = 'test' - -PICTURES_DIR = '%s/realestate/static/pictures' % PROJECT_DIR - -call(['%s/bin/phantomjs' % PROJECT_DIR, - '%s/bin/screen.js' % PROJECT_DIR, - 'http://vault.thelensnola.org/realestate/sale/2015-13092', - '%s/tweets/%s.png' % (PICTURES_DIR, name)]) diff --git a/realestate/lib/stat_analysis.py b/scripts/stat_analysis.py similarity index 53% rename from realestate/lib/stat_analysis.py rename to scripts/stat_analysis.py index ec7c764..b76b658 100644 --- a/realestate/lib/stat_analysis.py +++ b/scripts/stat_analysis.py @@ -1,39 +1,32 @@ # -*- coding: utf-8 -*- -'''Commmon statistical analyses, like high amounts, sales per day, etc.''' +"""Commmon statistical analyses, like high amounts, sales per day, etc.""" + +# import os -import os from sqlalchemy import create_engine -from realestate import DATABASE_NAME +from www import ENGINE_STRING -class StatAnalysis(object): - '''Commmon statistical analyses.''' +class StatAnalysis(object): + """Commmon statistical analyses.""" def __init__(self, initial_date=None, until_date=None): - '''Initialize self variables and establish connection to database.''' - + """Initialize self variables and establish connection to database.""" self.table = 'cleaned' self.initial_date = initial_date self.until_date = until_date - self.engine = create_engine( - 'postgresql://%s:%s@localhost/%s' % ( - os.environ.get('REAL_ESTATE_DATABASE_USERNAME'), - os.environ.get('REAL_ESTATE_DATABASE_PASSWORD'), - DATABASE_NAME - ) - ) + self.engine = create_engine(ENGINE_STRING) def count(self): - '''Get number of records.''' - + """Get number of records.""" sql = """ SELECT COUNT(*) - FROM %s - WHERE document_recorded >= '%s' AND document_recorded <= '%s'; - """ % (self.table, self.initial_date, self.until_date) + FROM {0} + WHERE document_recorded >= '{1}' AND document_recorded <= '{2}'; + """.format(self.table, self.initial_date, self.until_date) result = self.engine.execute(sql) @@ -42,15 +35,14 @@ def count(self): return count def detail_not_published(self): - '''Get rows that have unpublishable detail data.''' - + """Get rows that have unpublishable detail data.""" sql = """ SELECT COUNT(*) - FROM %s + FROM {0} WHERE detail_publish = False - AND document_recorded >= '%s' - AND document_recorded <= '%s'; - """ % (self.table, self.initial_date, self.until_date) + AND document_recorded >= '{1}' + AND document_recorded <= '{2}'; + """.format(self.table, self.initial_date, self.until_date) result = self.engine.execute(sql) for row in result: @@ -58,15 +50,14 @@ def detail_not_published(self): return count def detail_published(self): - '''Get rows that have publishable detail data.''' - + """Get rows that have publishable detail data.""" sql = """ SELECT COUNT(*) - FROM %s + FROM {0} WHERE detail_publish IS True - AND document_recorded >= '%s' - AND document_recorded <= '%s'; - """ % (self.table, self.initial_date, self.until_date) + AND document_recorded >= '{1}' + AND document_recorded <= '{2}'; + """.format(self.table, self.initial_date, self.until_date) result = self.engine.execute(sql) for row in result: @@ -74,15 +65,14 @@ def detail_published(self): return count def location_not_published(self): - '''Get rows that have unpublishable location data.''' - + """Get rows that have unpublishable location data.""" sql = """ SELECT COUNT(*) - FROM %s + FROM {0} WHERE location_publish IS False - AND document_recorded >= '%s' - AND document_recorded <= '%s'; - """ % (self.table, self.initial_date, self.until_date) + AND document_recorded >= '{1}' + AND document_recorded <= '{2}'; + """.format(self.table, self.initial_date, self.until_date) result = self.engine.execute(sql) for row in result: @@ -90,15 +80,14 @@ def location_not_published(self): return count def location_published(self): - '''Get rows that have publishable location data.''' - + """Get rows that have publishable location data.""" sql = """ SELECT COUNT(*) - FROM %s + FROM {0} WHERE location_publish IS True - AND document_recorded >= '%s' - AND document_recorded <= '%s'; - """ % (self.table, self.initial_date, self.until_date) + AND document_recorded >= '{1}' + AND document_recorded <= '{2}'; + """.format(self.table, self.initial_date, self.until_date) result = self.engine.execute(sql) for row in result: @@ -106,16 +95,15 @@ def location_published(self): return count def highest_amount(self): - '''Find the highest sale amount for a given date range.''' - + """Find the highest sale amount for a given date range.""" sql = """ SELECT amount - FROM %s - WHERE document_recorded >= '%s' - AND document_recorded <= '%s' + FROM {0} + WHERE document_recorded >= '{1}' + AND document_recorded <= '{2}' ORDER BY amount DESC LIMIT 1; - """ % (self.table, self.initial_date, self.until_date) + """.format(self.table, self.initial_date, self.until_date) result = self.engine.execute(sql) @@ -124,16 +112,15 @@ def highest_amount(self): return high_amount def lowest_amount(self): - '''Find the lowest sale amount for a given date range.''' - + """Find the lowest sale amount for a given date range.""" sql = """ SELECT amount - FROM %s - WHERE document_recorded >= '%s' - AND document_recorded <= '%s' + FROM {0} + WHERE document_recorded >= '{1}' + AND document_recorded <= '{2}' ORDER BY amount ASC LIMIT 1; - """ % (self.table, self.initial_date, self.until_date) + """.format(self.table, self.initial_date, self.until_date) result = self.engine.execute(sql) @@ -142,8 +129,7 @@ def lowest_amount(self): return low_amount def all_records(self): - '''Get all rows for the given date range.''' - + """Get all rows for the given date range.""" sql = """ SELECT amount, document_date, @@ -160,10 +146,10 @@ def all_records(self): permanent_flag, location_publish, neighborhood - FROM %s - WHERE document_recorded >= '%s' - AND document_recorded <= '%s'; - """ % (self.table, self.initial_date, self.until_date) + FROM {0} + WHERE document_recorded >= '{1}' + AND document_recorded <= '{2}'; + """.format(self.table, self.initial_date, self.until_date) result = self.engine.execute(sql) diff --git a/tests/test_clean.py b/tests/test_clean.py deleted file mode 100644 index 500c9e2..0000000 --- a/tests/test_clean.py +++ /dev/null @@ -1,241 +0,0 @@ -# -*- coding: utf-8 -*- - -import os -# import mock -from unittest import TestCase -from sqlalchemy import create_engine -from sqlalchemy.orm import sessionmaker -from realestate.db import ( - Detail - # Cleaned -) -from realestate.lib.clean import ( - Clean, - Join -) -from realestate import DATABASE_NAME -# from datetime import date - -# global application scope. create Session class, engine -Session = sessionmaker() - -engine = create_engine( - 'postgresql://%s:%s@localhost/%s' % ( - os.environ.get('REAL_ESTATE_DATABASE_USERNAME'), - os.environ.get('REAL_ESTATE_DATABASE_PASSWORD'), - DATABASE_NAME - ) -) - - -class TestJoin(TestCase): - "Test the Join class in `clean.py`." - - def setup(self): - ''' - Connect to the database, begin a non-ORM transaction and bind an - individual Session to the connection. - ''' - - # connect to the database - self.connection = engine.connect() - - # begin a non-ORM transaction - self.trans = self.connection.begin() - - # bind an individual Session to the connection - self.session = Session(bind=self.connection) - - def teardown(self): - ''' - rollback - everything that happened with the Session above (including - calls to commit()) is rolled back. - ''' - - self.trans.rollback() - self.session.close() - - # return connection to the Engine - self.connection.close() - - def test_join_subqueries_length(self): - ''' - Test that JOIN with each subquery results in same number of rows as - just the details table for the same date range. - ''' - - self.setup() - - d1 = '2014-02-18' - d2 = '2014-11-20' - - details_query = self.session.query( - Detail - ).filter( - Detail.document_recorded >= d1 - ).filter( - Detail.document_recorded <= d2 - ).all() - - subquery = Join(initial_date=d1, until_date=d2).join_subqueries() - - self.assertEqual(len(details_query), len(subquery)) - - self.teardown() - - -class TestClean(TestCase): - - '''Test the Clean class in `clean.py`.''' - - def setup(self): - ''' - Connect to the database, begin a non-ORM transaction and bind an - individual Session to the connection. - ''' - - # connect to the database - self.connection = engine.connect() - - # begin a non-ORM transaction - self.trans = self.connection.begin() - - # bind an individual Session to the connection - self.session = Session(bind=self.connection) - - def teardown(self): - ''' - rollback - everything that happened with the Session above (including - calls to commit()) is rolled back. - ''' - - # rollback - everything that happened with the - # Session above (including calls to commit()) - # is rolled back. - self.trans.rollback() - self.session.close() - - # return connection to the Engine - self.connection.close() - - # def test_commit_rows(self): - # ''' - # Test that committing cleaned rows handles any problems. - # ''' - - # self.setup() - - # initial_query = self.session.query(Cleaned).all() - - # initial_length = len(initial_query) - - # # Input with two new unique instrument numbers, - # # but also one duplicate. - # rows = [ - # { - # 'location_publish': '0', - # 'buyers': 'Jean Tran, Trang Thi Phuong Tran', - # 'document_recorded': date(2015, 4, 16), - # 'neighborhood': 'None', - # 'detail_publish': '1', - # 'longitude': -90.17215, - # 'sellers': 'Thap Van Nguyen', - # 'location_info': '', - # 'amount': 250000, - # 'instrument_no': '1234-56789', # New instrument number - # 'address': '', - # 'latitude': 29.96238, - # 'zip_code': '70121', - # 'document_date': None - # }, - # { - # 'location_publish': '0', - # 'buyers': 'Jean Tran, Trang Thi Phuong Tran', - # 'document_recorded': date(2015, 4, 16), - # 'neighborhood': 'None', - # 'detail_publish': '1', - # 'longitude': -90.17215, - # 'sellers': 'Thap Van Nguyen', - # 'location_info': '', - # 'amount': 250000, - # 'instrument_no': '1234-56789', # Duplicate instrument number - # 'address': '', - # 'latitude': 29.96238, - # 'zip_code': '70121', - # 'document_date': None - # }, - # { - # 'location_publish': '0', - # 'buyers': 'Jean Tran, Trang Thi Phuong Tran', - # 'document_recorded': date(2015, 4, 16), - # 'neighborhood': 'None', - # 'detail_publish': '1', - # 'longitude': -90.17215, - # 'sellers': 'Thap Van Nguyen', - # 'location_info': '', - # 'amount': 250000, - # 'instrument_no': '1234-56790', # New instrument number - # 'address': '', - # 'latitude': 29.96238, - # 'zip_code': '70121', - # 'document_date': None - # } - # ] - - # Clean().commit_rows(rows) - - # final_query = self.session.query(Cleaned).all() - - # final_length = len(final_query) - - # self.assertEqual(initial_length + 2, final_length) - - # self.teardown() - - def test_prep_rows(self): - ''' - Test that JOIN with each subquery results in same number of rows as - just the details table for the same date range. - ''' - - self.setup() - - rows = [ - { - 'sellers': 'john doe', - 'buyers': 'JANE DOE', - 'address': '123 main street', - 'location_info': 'Unit: 4, Lot: 3', - 'neighborhood': 'BLACK PEARL' - }, - { - 'sellers': 'john doe', - 'buyers': 'JANE DOE', - 'address': '', - 'location_info': 'Unit: 1, Lot: 2', - 'neighborhood': 'None' - } - ] - - outputrows = [ - { - 'sellers': 'John Doe', - 'buyers': 'Jane Doe', - 'address': '123 Main Street', - 'location_info': 'Unit: 4, Lot: 3', - 'neighborhood': 'Black Pearl' - }, - { - 'sellers': 'John Doe', - 'buyers': 'Jane Doe', - 'address': '', - 'location_info': 'Unit: 1, Lot: 2', - 'neighborhood': 'None' - } - ] - - rows = Clean().prep_rows(rows) - - self.assertEqual(outputrows, rows) - - self.teardown() diff --git a/tests/test_initialize.py b/tests/test_initialize.py new file mode 100644 index 0000000..091edde --- /dev/null +++ b/tests/test_initialize.py @@ -0,0 +1,197 @@ + +"""Unit tests for www/utils.py.""" + +import mock +import unittest + +from scripts.initialize import ( + BadDateRangeError, initialize, cli, cli_has_errors) + +# Good +single_date_arguments = { + "": "2016-12-11", + "": None, + "": None} + +date_range_arguments = { + "": None, + "": "2016-12-21", + "": "2016-12-31"} + +no_arguments = { + "": None, + "": None, + "": None} + +# Bad +all_arguments = { + "": "2016-12-11", + "": "2016-12-21", + "": "2016-12-31"} + +single_and_early_arguments = { + "": "2016-12-11", + "": "2016-12-21", + "": None} + +single_and_late_arguments = { + "": "2016-12-11", + "": None, + "": "2016-12-31"} + +early_only_arguments = { + "": None, + "": "2016-12-21", + "": None} + +late_only_arguments = { + "": None, + "": None, + "": "2016-12-31"} + +backward_date_range_arguments = { + "": None, + "": "2016-12-31", + "": "2016-12-21"} + + +class TestInitialize(unittest.TestCase): + """Test cases for scripts/initialize.py.""" + + def test_cli_errors_all(self): + """Test cli_has_errors() when all arguments supplied.""" + output = cli_has_errors(all_arguments) + self.assertEqual(output, True) + + def test_cli_errors_single_and_early(self): + """Test cli_has_errors() with single and early bound date arguments.""" + output = cli_has_errors(single_and_early_arguments) + self.assertEqual(output, True) + + def test_cli_errors_single_and_late(self): + """Test cli_has_errors() with single and late bound date arguments.""" + output = cli_has_errors(single_and_late_arguments) + self.assertEqual(output, True) + + def test_cli_errors_early_only(self): + """Test cli_has_errors() with early bound date only.""" + output = cli_has_errors(early_only_arguments) + self.assertEqual(output, True) + + def test_cli_errors_late_only(self): + """Test cli_has_errors() with late bound date only.""" + output = cli_has_errors(late_only_arguments) + self.assertEqual(output, True) + + def test_cli_errors_single_date(self): + """Test cli_has_errors() with single date only.""" + output = cli_has_errors(single_date_arguments) + self.assertEqual(output, False) + + def test_cli_errors_date_range(self): + """Test cli_has_errors() with date range only.""" + output = cli_has_errors(date_range_arguments) + self.assertEqual(output, False) + + def test_cli_errors_no_date(self): + """Test cli_has_errors() with no supplied dates.""" + output = cli_has_errors(no_arguments) + self.assertEqual(output, False) + + @mock.patch('scripts.initialize.cli_has_errors') + def test_cli_with_parsing_errors(self, mock_cli_has_errors): + """Test cli() when there are CLI parsing errors.""" + mock_cli_has_errors.return_value = True + output = cli(no_arguments) + self.assertEqual(output, None) + + @mock.patch('scripts.initialize.initialize') + @mock.patch('scripts.initialize.cli_has_errors') + def test_cli_no_date(self, mock_cli_has_errors, mock_initialize): + """Test cli() without specified date.""" + mock_cli_has_errors.return_value = False + cli(no_arguments) + mock_initialize.assert_called_with() + + @mock.patch('scripts.initialize.initialize') + @mock.patch('scripts.initialize.cli_has_errors') + def test_cli_single_date(self, mock_cli_has_errors, mock_initialize): + """Test cli() with single date only.""" + mock_cli_has_errors.return_value = False + cli(single_date_arguments) + mock_initialize.assert_called_with( + initial_date=single_date_arguments[''], + until_date=single_date_arguments['']) + + @mock.patch('scripts.initialize.initialize') + @mock.patch('scripts.initialize.cli_has_errors') + def test_cli_date_range(self, mock_cli_has_errors, mock_initialize): + """Test cli() with date range.""" + mock_cli_has_errors.return_value = False + cli(date_range_arguments) + mock_initialize.assert_called_with( + initial_date=date_range_arguments[''], + until_date=date_range_arguments['']) + + @mock.patch('scripts.initialize.initialize') + @mock.patch('scripts.initialize.cli_has_errors') + def test_cli_backward_date_range(self, mock_cli_has_errors, mock_init): + """Test cli() with backward date range.""" + mock_cli_has_errors.return_value = False + + with self.assertRaises(BadDateRangeError): + cli(backward_date_range_arguments) + + # initialize() + @mock.patch('scripts.initialize.Publish') + @mock.patch('scripts.initialize.GetDates') + @mock.patch('scripts.initialize.Geocode') + @mock.patch('scripts.initialize.Clean') + @mock.patch('scripts.initialize.Build') + def test_initialize_no_dates(self, mock_build, mock_clean, mock_geocode, + mock_getdates, mock_publish): + """Test initialize() with no dates supplied.""" + mock_getdates.return_value.get_date_range.return_value = { + "initial_date": "2016-12-11", + "until_date": "2016-12-21"} + + initialize() + + # Get dates + mock_getdates.assert_called_with() + mock_getdates.return_value.get_date_range.assert_called_with() + + # Build + mock_build.assert_called_with( + initial_date="2016-12-11", until_date="2016-12-21") + mock_build.return_value.build_all.assert_called_with() + + # Geocode + expected_call_list = [ + mock.call( + initial_date="2016-12-11", + until_date="2016-12-21"), + mock.call()] + + self.assertEqual(len(mock_geocode.call_args_list), 2) + self.assertEqual(mock_geocode.call_args_list, expected_call_list) + + mock_geocode.return_value.geocode.assert_called_with() + mock_geocode.return_value.\ + update_locations_with_neighborhoods.assert_called_with() + + # Publish + mock_publish.assert_called_with( + initial_date="2016-12-11", until_date="2016-12-21") + mock_publish.return_value.main.assert_called_with() + + # Clean + expected_call_list = [ + mock.call(initial_date="2016-12-11", until_date="2016-12-21"), + mock.call(initial_date="2016-12-11", until_date="2016-12-21")] + + self.assertEqual(len(mock_clean.call_args_list), 2) + self.assertEqual(mock_clean.call_args_list, expected_call_list) + + mock_clean.return_value.main.assert_called_with() + mock_clean.return_value.update_cleaned_geom.assert_called_with() diff --git a/tests/test_parse.py b/tests/test_parse.py deleted file mode 100644 index 6da67e6..0000000 --- a/tests/test_parse.py +++ /dev/null @@ -1,533 +0,0 @@ -# -*- coding: utf-8 -*- - -# import mock -from unittest import TestCase - -from realestate.lib.parse import ( - AllPurposeParser, - DetailParser, - VendorParser, - VendeeParser, - LocationParser -) -from realestate import PROJECT_DIR - - -class TestAllPurposeParser(TestCase): - "TestAllPurposeParser" - - html_path = ( - '%s/tests/data/' % PROJECT_DIR + - 'raw/2014-02-18/form-html/OPR288694480.html') - - def test_all_purpose_get_document_id_var(self): - '''Test that AllPurposeParser has document ID as self variable.''' - - document_id = AllPurposeParser(self.html_path).document_id - - self.assertEqual(document_id, "OPR288694480") - - def test_all_purpose_get_document_id_method(self): - '''Test AllPurposeParser method for finding document ID.''' - - document_id = AllPurposeParser( - self.html_path).get_document_id(self.html_path) - - self.assertEqual(document_id, "OPR288694480") - - -class TestDetailParser(TestCase): - - '''Test parser for details table.''' - - html_path = ( - '%s/tests/data/' % PROJECT_DIR + - 'raw/2014-02-18/form-html/OPR288694480.html') - - # todo: rows, parse_rows - - def test_get_field_document_type_var(self): - "test_get_field_document_type_var" - - document_type = DetailParser(self.html_path).document_type - - self.assertEqual(document_type, "SALE") - - def test_get_field_instrument_no_var(self): - "test_get_field_instrument_no_var" - - instrument_no = DetailParser(self.html_path).instrument_no - - self.assertEqual(instrument_no, "2014-06269") - - def test_get_field_multi_seq_var(self): - "test_get_field_multi_seq_var" - - multi_seq = DetailParser(self.html_path).multi_seq - - self.assertEqual(multi_seq, "0") - - def test_get_field_min__var(self): - "test_get_field_min__var" - - min_ = DetailParser(self.html_path).min_ - - self.assertEqual(min_, "") - - def test_get_field_cin_var(self): - "test_get_field_cin_var" - - cin = DetailParser(self.html_path).cin - - self.assertEqual(cin, "549928") - - def test_get_field_book_type_var(self): - "test_get_field_book_type_var" - - book_type = DetailParser(self.html_path).book_type - - self.assertEqual(book_type, "") - - def test_get_field_book_var(self): - "test_get_field_book_var" - - book = DetailParser(self.html_path).book - - self.assertEqual(book, "") - - def test_get_field_page_var(self): - "test_get_field_page_var" - - page = DetailParser(self.html_path).page - - self.assertEqual(page, "") - - def test_get_field_document_date_var_none(self): - ''' - Test that if no document date is entered, - then a value of None/NULL is returned. - ''' - - html_no_document_date = ( - '%s/tests/data/' % PROJECT_DIR + - 'raw/2014-05-01/form-html/OPR291526640.html') - - document_date = DetailParser(html_no_document_date).document_date - - self.assertEqual(document_date, None) - - def test_get_field_document_date_var(self): - "test_get_field_document_date_var" - - document_date = DetailParser(self.html_path).document_date - - self.assertEqual(document_date, "02/10/2014") - - def test_get_field_document_recorded_var(self): - "test_get_field_document_recorded_var" - - document_recorded = DetailParser(self.html_path).document_recorded - - self.assertEqual(document_recorded, "02/18/2014 10:35:37 AM") - - def test_get_field_amount_var(self): - "test_get_field_amount_var" - - amount = DetailParser(self.html_path).amount - - self.assertEqual(amount, 41000) - - def test_get_field_status_var(self): - "test_get_field_status_var" - - status = DetailParser(self.html_path).status - - self.assertEqual(status, "Verified") - - def test_get_field_prior_mortgage_doc_type_var(self): - "test_get_field_prior_mortgage_doc_type_var" - - prior_mortgage_doc_type = DetailParser( - self.html_path - ).prior_mortgage_doc_type - - self.assertEqual(prior_mortgage_doc_type, "") - - def test_get_field_prior_conveyance_doc_type_var(self): - "test_get_field_prior_conveyance_doc_type_var" - - output = DetailParser(self.html_path).prior_conveyance_doc_type - - self.assertEqual(output, "") - - def test_get_field_cancel_status_var(self): - "test_get_field_cancel_status_var" - - cancel_status = DetailParser(self.html_path).cancel_status - - self.assertEqual(cancel_status, "") - - def test_get_field_remarks_var(self): - "test_get_field_remarks_var" - - remarks = DetailParser(self.html_path).remarks - - self.assertEqual(remarks, "") - - def test_get_field_no_pages_in_image_var(self): - "test_get_field_no_pages_in_image_var" - - no_pages_in_image = DetailParser(self.html_path).no_pages_in_image - - self.assertEqual(no_pages_in_image, "8") - - def test_get_field_image_var(self): - "test_get_field_image_var" - - image = DetailParser(self.html_path).image - - self.assertEqual(image, "") - - # def test_form_dict(self): - # "test_form_dict" - - # output = DetailParser(self.html_path).form_dict() - - # self.assertEqual(type(output), dict) - - def test_form_dict(self): - '''docstring''' - - details_dict = { - 'status': 'Verified', - 'document_recorded': '02/18/2014 10:35:37 AM', - 'prior_conveyance_doc_type': '', - 'no_pages_in_image': '8', - 'prior_mortgage_doc_type': '', - 'cin': '549928', - 'instrument_no': '2014-06269', - 'page': '', - 'amount': 41000, - 'book_type': '', - 'document_id': 'OPR288694480', - 'cancel_status': '', - 'min_': '', - 'remarks': '', - 'document_type': 'SALE', - 'image': '', - 'book': '', - 'multi_seq': '0', - 'document_date': '02/10/2014' - } - - test_details_dict = DetailParser(self.html_path).form_dict() - - self.assertEqual(test_details_dict, details_dict) - - -class TestVendorParser(TestCase): - - '''TestVendorParser''' - - html_path = ( - '%s/tests/data/' % PROJECT_DIR + - 'raw/2014-02-18/form-html/OPR288694480.html') - - def test_get_vendor_blank(self): - "test_get_vendor_blank" - - l = VendorParser(self.html_path).form_list() - - vendor_blank1 = l[0]['vendor_blank'] - vendor_blank2 = l[1]['vendor_blank'] - - self.assertEqual(vendor_blank1, "1") - self.assertEqual(vendor_blank2, "2") - - def test_get_vendor_p_c(self): - "test_get_vendor_p_c" - - l = VendorParser(self.html_path).form_list() - - vendor_p_c1 = l[0]['vendor_p_c'] - vendor_p_c2 = l[1]['vendor_p_c'] - - self.assertEqual(vendor_p_c1, "C") - self.assertEqual(vendor_p_c2, "C") - - def test_get_vendor_lastname(self): - "test_get_vendor_lastname" - - l = VendorParser(self.html_path).form_list() - - vendor_lastname1 = l[0]['vendor_lastname'] - vendor_lastname2 = l[1]['vendor_lastname'] - - self.assertEqual( - vendor_lastname1, - "NEW ORLEANS REDEVELOPMENT AUTHORITY" - ) - self.assertEqual( - vendor_lastname2, - "COMMUNITY IMPROVEMENT AGENCY" - ) - - def test_get_vendor_firstname(self): - "test_get_vendor_firstname" - - l = VendorParser(self.html_path).form_list() - - vendor_firstname1 = l[0]['vendor_firstname'] - vendor_firstname2 = l[1]['vendor_firstname'] - - self.assertEqual(vendor_firstname1, "") - self.assertEqual(vendor_firstname2, "") - - def test_get_vendor_relator(self): - "test_get_vendor_relator" - - l = VendorParser(self.html_path).form_list() - - vendor_relator1 = l[0]['vendor_relator'] - vendor_relator2 = l[1]['vendor_relator'] - - self.assertEqual(vendor_relator1, "FKA") - self.assertEqual(vendor_relator2, "") - - def test_get_vendor_cancel_status(self): - "test_get_vendor_cancel_status" - - l = VendorParser(self.html_path).form_list() - - vendor_cancel_status1 = l[0]['vendor_cancel_status'] - vendor_cancel_status2 = l[1]['vendor_cancel_status'] - - self.assertEqual(vendor_cancel_status1, "") - self.assertEqual(vendor_cancel_status2, "") - - def test_form_list(self): - '''docstring''' - - vendors_list = [{ - 'vendor_blank': '1', - 'vendor_p_c': 'C', - 'vendor_relator': 'FKA', - 'vendor_cancel_status': '', - 'vendor_firstname': '', - 'document_id': 'OPR288694480', - 'vendor_lastname': 'NEW ORLEANS REDEVELOPMENT AUTHORITY' - }, { - 'vendor_blank': '2', - 'vendor_p_c': 'C', - 'vendor_relator': '', - 'vendor_cancel_status': '', - 'vendor_firstname': '', - 'document_id': 'OPR288694480', - 'vendor_lastname': 'COMMUNITY IMPROVEMENT AGENCY' - }] - - test_vendors_list = VendorParser(self.html_path).form_list() - - self.assertEqual(vendors_list, test_vendors_list) - - -class TestVendeeParser(TestCase): - - '''TestVendeeParser''' - - html_path = ( - '%s/tests/data/' % PROJECT_DIR + - 'raw/2014-02-18/form-html/OPR288694480.html') - - def test_get_vendee_blank(self): - "test_get_vendee_blank" - - l = VendeeParser(self.html_path).form_list() - - vendee_blank = l[0]['vendee_blank'] - - self.assertEqual(vendee_blank, "1") - - def test_get_vendee_p_c(self): - "test_get_vendee_p_c" - - l = VendeeParser(self.html_path).form_list() - - vendee_p_c = l[0]['vendee_p_c'] - - self.assertEqual(vendee_p_c, "C") - - def test_get_vendee_lastname(self): - "test_get_vendee_lastname" - - l = VendeeParser(self.html_path).form_list() - - vendee_lastname = l[0]['vendee_lastname'] - - self.assertEqual(vendee_lastname, "UV SOLO TRUST") - - def test_get_vendee_firstname(self): - "test_get_vendee_firstname" - - l = VendeeParser(self.html_path).form_list() - - vendee_firstname = l[0]['vendee_firstname'] - - self.assertEqual(vendee_firstname, "") - - def test_get_vendee_relator(self): - "test_get_vendee_relator" - - l = VendeeParser(self.html_path).form_list() - - vendee_relator = l[0]['vendee_relator'] - - self.assertEqual(vendee_relator, "") - - def test_get_vendee_cancel_status(self): - "test_get_vendee_cancel_status" - - l = VendeeParser(self.html_path).form_list() - - vendee_cancel_status = l[0]['vendee_cancel_status'] - - self.assertEqual(vendee_cancel_status, "") - - def test_form_list(self): - '''docstring''' - - vendees_list = [{ - 'vendee_lastname': 'UV SOLO TRUST', - 'vendee_cancel_status': '', - 'vendee_p_c': 'C', - 'vendee_firstname': '', - 'vendee_relator': '', - 'vendee_blank': '1', - 'document_id': 'OPR288694480' - }] - - test_vendees_list = VendeeParser(self.html_path).form_list() - - self.assertEqual(vendees_list, test_vendees_list) - - -class TestLocationParser(TestCase): - - "TestLocationParser" - - html_path = ( - '%s/tests/data/' % PROJECT_DIR + - 'raw/2014-02-18/form-html/OPR288694480.html') - - def test_get_field_subdivision(self): - "test_get_field_subdivision" - - subdivision = LocationParser(self.html_path).get_subdivision(0) - - self.assertEqual(subdivision, "ARDYN PARK") - - def test_get_field_condo(self): - "test_get_field_condo" - - condo = LocationParser(self.html_path).get_condo(0) - - self.assertEqual(condo, "") - - def test_get_field_district(self): - "test_get_field_district" - - district = LocationParser(self.html_path).get_district(0) - - self.assertEqual(district, "3RD") - - def test_get_field_square(self): - "test_get_field_square" - - square = LocationParser(self.html_path).get_square(0) - - self.assertEqual(square, "4-A") - - def test_get_field_street_number(self): - "test_get_field_street_number" - - street_number = LocationParser( - self.html_path).get_street_number(0) - - self.assertEqual(street_number, "7532") - - def test_get_field_address(self): - "test_get_field_address" - - address = LocationParser(self.html_path).get_address(0) - - self.assertEqual(address, "PRIMROSE DR") - - def test_get_field_unit(self): - "test_get_field_unit" - - unit = LocationParser(self.html_path).get_unit(0) - - self.assertEqual(unit, "") - - def test_get_field_weeks(self): - "test_get_field_weeks" - - weeks = LocationParser(self.html_path).get_weeks(0) - - self.assertEqual(weeks, "") - - def test_get_field_cancel_status_unit(self): - "test_get_field_cancel_status_unit" - - cancel_status_unit = LocationParser( - self.html_path).get_cancel_status_unit(0) - - self.assertEqual(cancel_status_unit, " ") - - def test_get_field_freeform_legal(self): - "test_get_field_freeform_legal" - - freeform_legal = LocationParser( - self.html_path).get_freeform_legal(0) - - self.assertEqual(freeform_legal, "") - - # todo: closer look with various versions - def test_get_field_cancel_status_lot(self): - "test_get_field_cancel_status_lot" - - cancel_status_lot = LocationParser( - self.html_path).get_cancel_status_lot(0) - - self.assertEqual(cancel_status_lot, " ") - - # todo: closer look with various HTML, for "xx to xx" - def test_get_field_lot(self): - "test_get_field_lot" - - lot = LocationParser(self.html_path).get_lot(0) - - self.assertEqual(lot, "17-A") - - def test_form_list(self): - '''docstring''' - - locations_list = [{ - 'square': '4-A', - 'address': 'PRIMROSE DR', - 'condo': '', - 'cancel_status_unit': ' ', - 'unit': '', - 'freeform_legal': '', - 'subdivision': 'ARDYN PARK', - 'street_number': '7532', - 'district': '3RD', - 'cancel_status_lot': ' ', - 'document_id': 'OPR288694480', - 'lot': '17-A', - 'weeks': '' - }] - - test_locations_list = LocationParser(self.html_path).form_list() - - self.assertEqual(locations_list, test_locations_list) diff --git a/tests/test_pep8.py b/tests/test_pep8.py index 036eafb..8025bfc 100644 --- a/tests/test_pep8.py +++ b/tests/test_pep8.py @@ -1,40 +1,40 @@ # -*- coding: utf-8 -*- -'''Tests that all Python files in project meet PEP8 style.''' +"""Test that all Python files in project meet PEP8 style.""" -from unittest import TestCase -import pep8 import os +import pep8 import fnmatch -from realestate import PROJECT_DIR + +import unittest + +from www import PROJECT_DIR # ignore stuff in virtualenvs or version control directories -ignore_patterns = ('backup', 'data', 'logs', 'misc') +ignore_patterns = [ + '.egg-info', '.git', '.tox', + 'backups', 'confs', 'data', 'docs', 'logs', 'misc'] def ignore(directory): - '''Check if this directory should be ignored.''' - + """Check if this directory should be ignored.""" for pattern in ignore_patterns: if pattern in directory: return True - return False + return False -class TestPep8(TestCase): - '''Test that all Python files conform to PEP8 standards.''' +class TestPep8(unittest.TestCase): + """Test that all Python files conform to PEP8 standards.""" def test_pep8(self): - '''Test that all Python files conform to PEP8 standards.''' - - pep8style = pep8.StyleGuide(quiet=False) + """Test that all Python files conform to PEP8 standards.""" + pep8style = pep8.StyleGuide(quiet=False) # TODO # Find all .py files files_list = [] - for root, dirnames, filenames in os.walk( - '%s' % PROJECT_DIR - ): + for root, dirnames, filenames in os.walk(PROJECT_DIR): if ignore(root): continue @@ -43,5 +43,7 @@ def test_pep8(self): errors = pep8style.check_files(files_list).total_errors - self.assertEqual(errors, 0, - 'Found %s PEP8 errors (and warnings).' % errors) + self.assertEqual( + errors, + 0, + 'Found {} PEP8 errors (and warnings).'.format(errors)) diff --git a/tests/test_pylint.py b/tests/test_pylint.py index 6c3ac88..ac01eec 100644 --- a/tests/test_pylint.py +++ b/tests/test_pylint.py @@ -1,51 +1,55 @@ # -*- coding: utf-8 -*- -# todo: everything +"""Test that all Python files in project pass pylint tests.""" -'''Tests that all Python files in project pass pylint tests.''' - -from unittest import TestCase import os import fnmatch + from subprocess import call -from realestate import PROJECT_DIR -# ignore stuff in virtualenvs or version control directories -ignore_patterns = ('scripts', 'tests', 'misc') +from www import PROJECT_DIR +# Ignore stuff in virtualenvs or version control directories +ignore_patterns = [ + '.egg-info', '.git', '.tox', + 'backups', 'confs', 'data', 'docs', 'logs', 'misc'] -def ignore(dir): - '''Should the directory be ignored?''' +def ignore(directory): + """Check if the directory should be ignored.""" for pattern in ignore_patterns: - if pattern in dir: + if pattern in directory: return True - return False - - -class TestPylint(TestCase): - '''Test that all Python files pass pylint tests.''' + return False - def test_pep8(self): - '''Test that all Python files pass pylint tests.''' - # pep8style = pep8.StyleGuide(quiet=False) +class RunPylint(object): + """Run pylint on all Python files.""" - # Find all .py files + def test_pylint(self): + """Run pylint on all Python files.""" files_list = [] - for root, dirnames, filenames in os.walk('%s' % PROJECT_DIR): + + for root, dirnames, filenames in os.walk(PROJECT_DIR): if ignore(root): continue for filename in fnmatch.filter(filenames, '*.py'): files_list.append(os.path.join(root, filename)) - for f in files_list: + for file in files_list: + # (pylint_stdout, pylint_stderr) = epylint.py_run( + # command_options="{} --errors-only".format(file), + # return_std=True) + + # print(pylint_stdout.getvalue()) + # print(pylint_stderr.getvalue()) + call([ 'pylint', '--errors-only', - # '--ignore=check_assessor_urls.py', # todo: not working - # --disable=invalid-name, - f - ]) + file]) + +if __name__ == '__main__': + RunPylint().test_pylint() diff --git a/tests/test_results_language.py b/tests/test_results_language.py new file mode 100644 index 0000000..09aa8bb --- /dev/null +++ b/tests/test_results_language.py @@ -0,0 +1,307 @@ + +"""Unit tests for www/utils.py.""" + +import copy +import mock +import unittest + +from www.results_language import ResultsLanguage + +standard_data = { + "name_address": "Keyword", + "number_of_records": 4344, + "neighborhood": "Gert Town", + "zip_code": "70125", + "amount_low": 10, + "amount_high": 1000, + "begin_date": "2016-05-01", + "end_date": "2016-12-31", + "map_button_state": True} + + +class TestResultsLanguage(unittest.TestCase): + """Test cases for www/results_language.py.""" + + def test_plural_or_not(self): + """Test convert_amount().""" + expected = "sales" + + rl = ResultsLanguage(standard_data) + actual = rl.plural_or_not() + + self.assertEqual(actual, expected) + + def test_plural_or_not_single(self): + """Test convert_amount() for a single sale.""" + expected = "sale" + + new_data = copy.deepcopy(standard_data) + new_data["number_of_records"] = 1 + rl = ResultsLanguage(new_data) + + actual = rl.plural_or_not() + + self.assertEqual(actual, expected) + + @mock.patch('www.results_language.get_number_with_commas') + def test_add_initial_language(self, mock_commas): + """Test add_initial_language().""" + mock_commas.return_value = "One" + + input = "sale" + expected = "One sale found" + + rl = ResultsLanguage(standard_data) + actual = rl.add_initial_language(input) + + self.assertEqual(actual, expected) + + def test_add_keyword_language_keyword(self): + """Test add_keyword_language() for single word.""" + input = "Current language" + expected = 'Current language for keyword "Keyword"' + + rl = ResultsLanguage(standard_data) + actual = rl.add_keyword_language(input) + + self.assertEqual(actual, expected) + + def test_add_keyword_language_key_phrase(self): + """Test add_keyword_language() for multiple words.""" + input = "Current language" + expected = 'Current language for key phrase "Key phrase"' + + new_data = copy.deepcopy(standard_data) + new_data["name_address"] = "Key phrase" + rl = ResultsLanguage(new_data) + + actual = rl.add_keyword_language(input) + + self.assertEqual(actual, expected) + + def test_add_keyword_language_key_empty(self): + """Test add_keyword_language() for empty search.""" + input = "Current language" + expected = 'Current language' + + new_data = copy.deepcopy(standard_data) + new_data["name_address"] = "" + rl = ResultsLanguage(new_data) + + actual = rl.add_keyword_language(input) + + self.assertEqual(actual, expected) + + def test_add_nbhd_zip_language_nothing_new(self): + """Test add_nbhd_zip_language() nothing new.""" + input = "Current sentence" + expected = "Current sentence" + + new_data = copy.deepcopy(standard_data) + new_data["neighborhood"] = "" + new_data["zip_code"] = "" + rl = ResultsLanguage(new_data) + + actual = rl.add_nbhd_zip_language(input) + + self.assertEqual(actual, expected) + + def test_add_nbhd_zip_language_zip_only(self): + """Test add_nbhd_zip_language() with zip only.""" + input = "Current sentence" + expected = "Current sentence in ZIP code 70125" + + new_data = copy.deepcopy(standard_data) + new_data["neighborhood"] = "" + rl = ResultsLanguage(new_data) + + actual = rl.add_nbhd_zip_language(input) + + self.assertEqual(actual, expected) + + def test_add_nbhd_zip_language_nbhd_only(self): + """Test add_nbhd_zip_language() with neighborhood only.""" + input = "Current sentence" + expected = "Current sentence in the Gert Town neighborhood" + + new_data = copy.deepcopy(standard_data) + new_data["zip_code"] = "" + rl = ResultsLanguage(new_data) + + actual = rl.add_nbhd_zip_language(input) + + self.assertEqual(actual, expected) + + def test_add_nbhd_zip_language_nbhd_and_zip(self): + """Test add_nbhd_zip_language() with zip and neighborhood.""" + input = "Current sentence" + expected = "Current sentence in the Gert Town neighborhood and 70125" + + rl = ResultsLanguage(standard_data) + actual = rl.add_nbhd_zip_language(input) + + self.assertEqual(actual, expected) + + def test_add_amount_language_none(self): + """Test add_amount_language() without anything new.""" + input = "Current sentence" + expected = "Current sentence" + + new_data = copy.deepcopy(standard_data) + new_data["amount_low"] = "" + new_data["amount_high"] = "" + rl = ResultsLanguage(new_data) + + actual = rl.add_amount_language(input) + + self.assertEqual(actual, expected) + + def test_add_amount_language_both(self): + """Test add_amount_language() with both.""" + input = "Current sentence" + expected = "Current sentence where the price was between " + \ + "$10 and $1,000" + + rl = ResultsLanguage(standard_data) + actual = rl.add_amount_language(input) + + self.assertEqual(actual, expected) + + def test_add_amount_language_low_only(self): + """Test add_amount_language() with low amount only.""" + input = "Current sentence" + expected = "Current sentence where the price was greater than $10" + + new_data = copy.deepcopy(standard_data) + new_data["amount_high"] = "" + rl = ResultsLanguage(new_data) + + actual = rl.add_amount_language(input) + + self.assertEqual(actual, expected) + + def test_add_amount_language_high_only(self): + """Test add_amount_language() with high amount only.""" + input = "Current sentence" + expected = "Current sentence where the price was less than $1,000" + + new_data = copy.deepcopy(standard_data) + new_data["amount_low"] = "" + rl = ResultsLanguage(new_data) + + actual = rl.add_amount_language(input) + + self.assertEqual(actual, expected) + + def test_add_date_language_none(self): + """Test add_date_language() without any new data.""" + input = "Current sentence" + expected = "Current sentence" + + new_data = copy.deepcopy(standard_data) + new_data["begin_date"] = "" + new_data["end_date"] = "" + rl = ResultsLanguage(new_data) + + actual = rl.add_date_language(input) + + self.assertEqual(actual, expected) + + def test_add_date_language_both(self): + """Test add_date_language() with both dates.""" + input = "Current sentence" + expected = "Current sentence between May 1, 2016, and Dec. 31, 2016," + + rl = ResultsLanguage(standard_data) + actual = rl.add_date_language(input) + + self.assertEqual(actual, expected) + + def test_add_date_language_begin_only(self): + """Test add_date_language() with beginning date only.""" + input = "Current sentence" + expected = "Current sentence after May 1, 2016," + + new_data = copy.deepcopy(standard_data) + new_data["end_date"] = "" + rl = ResultsLanguage(new_data) + + actual = rl.add_date_language(input) + + self.assertEqual(actual, expected) + + def test_add_date_language_end_only(self): + """Test add_date_language() with end date only.""" + input = "Current sentence" + expected = "Current sentence before Dec. 31, 2016," + + new_data = copy.deepcopy(standard_data) + new_data["begin_date"] = "" + rl = ResultsLanguage(new_data) + + actual = rl.add_date_language(input) + + self.assertEqual(actual, expected) + + def test_add_map_filtering_language(self): + """Test add_map_filtering_language().""" + input = "Current sentence" + expected = "Current sentence in the current map view" + + rl = ResultsLanguage(standard_data) # True + actual = rl.add_map_filtering_language(input) + + self.assertEqual(actual, expected) + + def test_add_final_sentence_language_no_quotes(self): + """Test add_final_sentence_language() without quotes.""" + input = "Current sentence" + expected = "Current sentence." + + rl = ResultsLanguage(standard_data) + actual = rl.add_final_sentence_language(input) + + self.assertEqual(actual, expected) + + def test_add_final_sentence_language_end_on_date(self): + """Test add_final_sentence_language() ending on a date.""" + input = "Current sentence Dec. 31, 2016," + expected = "Current sentence Dec. 31, 2016." + + rl = ResultsLanguage(standard_data) + actual = rl.add_final_sentence_language(input) + + self.assertEqual(actual, expected) + + def test_add_final_sentence_language_single_quotes(self): + """Test add_final_sentence_language() with single quotes at end.""" + input = "Current 'sentence'" + expected = "Current 'sentence.'" + + rl = ResultsLanguage(standard_data) + actual = rl.add_final_sentence_language(input) + + self.assertEqual(actual, expected) + + def test_add_final_sentence_language_double_quotes(self): + """Test add_final_sentence_language() with double quotes at end.""" + input = 'Current "sentence"' + expected = 'Current "sentence."' + + rl = ResultsLanguage(standard_data) + actual = rl.add_final_sentence_language(input) + + self.assertEqual(actual, expected) + + def test_main_all(self): + """Test main() with all components.""" + expected = ( + '4,344 sales found for keyword "Keyword" in the Gert Town ' + + "neighborhood and 70125 where the price was between $10 and " + + "$1,000 between May 1, 2016, and Dec. 31, 2016, in the " + + "current map view.") + + rl = ResultsLanguage(standard_data) + actual = rl.main() + + self.assertEqual(actual, expected) diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..8734bce --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,267 @@ + +"""Unit tests for www/utils.py.""" + +import unittest + +from www.utils import ( + convert_amount, + get_number_with_commas, + get_num_with_curr_sign, + ymd_to_mdy, + ymd_to_mdy_slashes, + ymd_to_full_date, + convert_month_to_ap_style, + binary_to_english, + english_to_binary) + + +class TestUtils(unittest.TestCase): + """Test cases for www/utils.py.""" + + def test_convert_amount(self): + """Test convert_amount().""" + input = "$4,000" + expected = 4000 + actual = convert_amount(input) + + self.assertEqual(actual, expected) + + def test_get_number_with_commas(self): + """Test get_number_with_commas().""" + input = 5000 + expected = '5,000' + actual = get_number_with_commas(input) + + self.assertEqual(actual, expected) + + def test_get_num_with_curr_sign(self): + """Test get_num_with_curr_sign().""" + input = 5000 + expected = '$5,000' + actual = get_num_with_curr_sign(input) + + self.assertEqual(actual, expected) + + def test_ymd_to_mdy(self): + """Test ymd_to_mdy().""" + input = "2016-12-31" + expected = "12-31-2016" + actual = ymd_to_mdy(input) + + self.assertEqual(actual, expected) + + def test_ymd_to_mdy_none(self): + """Test ymd_to_mdy() when input is None.""" + input = None + expected = "None" + actual = ymd_to_mdy(input) + + self.assertEqual(actual, expected) + + def test_ymd_to_mdy_slashes(self): + """Test ymd_to_mdy_slashes().""" + input = "2016-12-31" + expected = "12/31/2016" + actual = ymd_to_mdy_slashes(input) + + self.assertEqual(actual, expected) + + def test_ymd_to_mdy_slashes_none(self): + """Test ymd_to_mdy_slashes() when input is None.""" + input = None + expected = "None" + actual = ymd_to_mdy_slashes(input) + + self.assertEqual(actual, expected) + + def test_ymd_to_full_date(self): + """Test ymd_to_full_date().""" + input = "2016-12-31" + expected = "Saturday, Dec. 31, 2016" + actual = ymd_to_full_date(input) + + self.assertEqual(actual, expected) + + def test_ymd_to_full_date_no_leading_zero(self): + """Test ymd_to_full_date() with one-digit date.""" + input = "2016-12-01" + expected = "Thursday, Dec. 1, 2016" + actual = ymd_to_full_date(input) + + self.assertEqual(actual, expected) + + def test_ymd_to_full_date_no_ap_month_abbr(self): + """Test ymd_to_full_date() with unabbreviated month.""" + input = "2016-07-21" + expected = "Thursday, July 21, 2016" + actual = ymd_to_full_date(input) + + self.assertEqual(actual, expected) + + def test_ymd_to_full_date_no_day(self): + """Test ymd_to_full_date() without day.""" + input = "2016-12-31" + expected = "Dec. 31, 2016" + actual = ymd_to_full_date(input, no_day=True) + + self.assertEqual(actual, expected) + + def test_ymd_to_full_date_slashes(self): + """Test ymd_to_full_date() when input has slashes.""" + input = "12/31/2016" + expected = "Dec. 31, 2016" + actual = ymd_to_full_date(input) + + self.assertEqual(actual, expected) + + def test_ymd_to_full_date_none(self): + """Test ymd_to_full_date() when input is None.""" + input = None + expected = "None" + actual = ymd_to_full_date(input) + + self.assertEqual(actual, expected) + + def test_convert_month_to_ap_style_january(self): + """Test convert_month_to_ap_style() for January.""" + input = "January" + expected = "Jan." + actual = convert_month_to_ap_style(input) + + self.assertEqual(actual, expected) + + def test_convert_month_to_ap_style_february(self): + """Test convert_month_to_ap_style() for February.""" + input = "February" + expected = "Feb." + actual = convert_month_to_ap_style(input) + + self.assertEqual(actual, expected) + + def test_convert_month_to_ap_style_march(self): + """Test convert_month_to_ap_style() for March.""" + input = "March" + expected = "March" + actual = convert_month_to_ap_style(input) + + self.assertEqual(actual, expected) + + def test_convert_month_to_ap_style_april(self): + """Test convert_month_to_ap_style() for April.""" + input = "April" + expected = "April" + actual = convert_month_to_ap_style(input) + + self.assertEqual(actual, expected) + + def test_convert_month_to_ap_style_no_abbr(self): + """Test convert_month_to_ap_style() for May.""" + input = "May" + expected = "May" + actual = convert_month_to_ap_style(input) + + self.assertEqual(actual, expected) + + def test_convert_month_to_ap_style_june(self): + """Test convert_month_to_ap_style() for June.""" + input = "June" + expected = "June" + actual = convert_month_to_ap_style(input) + + self.assertEqual(actual, expected) + + def test_convert_month_to_ap_style_july(self): + """Test convert_month_to_ap_style() for July.""" + input = "July" + expected = "July" + actual = convert_month_to_ap_style(input) + + self.assertEqual(actual, expected) + + def test_convert_month_to_ap_style_august(self): + """Test convert_month_to_ap_style() for August.""" + input = "August" + expected = "Aug." + actual = convert_month_to_ap_style(input) + + self.assertEqual(actual, expected) + + def test_convert_month_to_ap_style_september(self): + """Test convert_month_to_ap_style() for September.""" + input = "September" + expected = "Sept." + actual = convert_month_to_ap_style(input) + + self.assertEqual(actual, expected) + + def test_convert_month_to_ap_style_october(self): + """Test convert_month_to_ap_style() for October.""" + input = "October" + expected = "Oct." + actual = convert_month_to_ap_style(input) + + self.assertEqual(actual, expected) + + def test_convert_month_to_ap_style_november(self): + """Test convert_month_to_ap_style() for November.""" + input = "November" + expected = "Nov." + actual = convert_month_to_ap_style(input) + + self.assertEqual(actual, expected) + + def test_convert_month_to_ap_style_december(self): + """Test convert_month_to_ap_style() for December.""" + input = "December" + expected = "Dec." + actual = convert_month_to_ap_style(input) + + self.assertEqual(actual, expected) + + def test_binary_to_english_1(self): + """Test binary_to_english() when input is 1.""" + input = 1 + expected = "Yes" + actual = binary_to_english(input) + + self.assertEqual(actual, expected) + + def test_binary_to_english_0(self): + """Test binary_to_english() when input is 0.""" + input = 0 + expected = "No" + actual = binary_to_english(input) + + self.assertEqual(actual, expected) + + def test_english_to_binary_yes(self): + """Test english_to_binary().""" + input = "Yes" + expected = 1 + actual = english_to_binary(input) + + self.assertEqual(actual, expected) + + def test_english_to_binary_ya(self): + """Test english_to_binary().""" + input = "ya" + expected = 1 + actual = english_to_binary(input) + + self.assertEqual(actual, expected) + + def test_english_to_binary_no(self): + """Test english_to_binary().""" + input = "No" + expected = 0 + actual = english_to_binary(input) + + self.assertEqual(actual, expected) + + def test_english_to_binary_nah(self): + """Test english_to_binary().""" + input = "nah" + expected = 0 + actual = english_to_binary(input) + + self.assertEqual(actual, expected) diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..06a31c9 --- /dev/null +++ b/tox.ini @@ -0,0 +1,19 @@ +[tox] +envlist = py{27,35} +skipsdist = True + +[testenv] +deps = + -r{toxinidir}/requirements.txt + +commands = nosetests tests + +passenv = * + +[testenv:py27] +deps = + {[testenv]deps} + +[testenv:py35] +deps = + {[testenv]deps} diff --git a/realestate/__init__.py b/www/__init__.py similarity index 62% rename from realestate/__init__.py rename to www/__init__.py index fd74ae7..06c8da7 100644 --- a/realestate/__init__.py +++ b/www/__init__.py @@ -1,68 +1,52 @@ # -*- coding: utf-8 -*- -""" -Package-wide script that is always run. +"""App-wide common variables, such as file names, file paths and dates.""" -Includes common variables, such as file names, file paths and dates. -Also includes logging, which can be accessed by any module like so: - -`log.debug('Description')` - -`log.info('Description')` - -`log.error('Description')` - -`log.exception(error, exc_info=True)` - -You can change the logging level to your choosing. The default is DEBUG. -""" - -import logging -import logging.handlers import os import getpass +import logging +import logging.handlers from datetime import date, timedelta -from sqlalchemy.orm import sessionmaker from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker USER = getpass.getuser() PROJECT_DIR = os.path.abspath( os.path.join(os.path.dirname(__file__), '..')) +GEO_DIR = "{}/data/geo".format(PROJECT_DIR) + if USER == 'ubuntu': # Server BACKUP_DIR = '/backups/realestate' - GEO_DIR = '/apps/geographic-data/repo' - PROJECT_URL = 'http://vault.thelensnola.org/realestate' - - # Static assets S3_URL = "https://s3-us-west-2.amazonaws.com/lensnola/realestate" - LENS_JS = "%s/js/lens.js" % S3_URL - INDEX_JS = "%s/js/index.js" % S3_URL - SEARCH_AREA_JS = "%s/js/search-area.js" % S3_URL - SEARCH_JS = "%s/js/search.js" % S3_URL - MAP_JS = "%s/js/map.js" % S3_URL - SALE_JS = "%s/js/sale.js" % S3_URL - DASHBOARD_JS = "%s/js/dashboard.js" % S3_URL - NEIGHBORHOODS_TOPO = "%s/js/neighborhoods-topo.js" % S3_URL - - LENS_CSS = "%s/css/lens.css" % S3_URL - REALESTATE_CSS = "%s/css/realestate.css" % S3_URL - BANNER_CSS = "%s/css/banner.css" % S3_URL - TABLE_CSS = "%s/css/table.css" % S3_URL + # JavaScript + LENS_JS = "{}/js/lens.js".format(S3_URL) + INDEX_JS = "{}/js/index.js".format(S3_URL) + SEARCH_AREA_JS = "{}/js/search-area.js".format(S3_URL) + SEARCH_JS = "{}/js/search.js".format(S3_URL) + MAP_JS = "{}/js/map.js".format(S3_URL) + SALE_JS = "{}/js/sale.js".format(S3_URL) + DASHBOARD_JS = "{}/js/dashboard.js".format(S3_URL) + NEIGHBORHOODS_TOPO = "{}/js/neighborhoods-topo.js".format(S3_URL) + + # CSS + LENS_CSS = "{}/css/lens.css".format(S3_URL) + REALESTATE_CSS = "{}/css/realestate.css".format(S3_URL) + BANNER_CSS = "{}/css/banner.css".format(S3_URL) + TABLE_CSS = "{}/css/table.css".format(S3_URL) RELOADER = False DEBUG = False PORT = 5004 -else: # Local - BACKUP_DIR = '%s/backups' % PROJECT_DIR - GEO_DIR = '/Users/%s/projects/geographic-data/repo' % USER - PROJECT_URL = 'http://localhost:5000/realestate' + LOGGING_LEVEL = logging.INFO +else: # Local + BACKUP_DIR = '{}/backups'.format(PROJECT_DIR) - # Static assets + # JavaScript LENS_JS = '/static/js/lens.js' INDEX_JS = "/static/js/index.js" SEARCH_AREA_JS = '/static/js/search-area.js' @@ -72,6 +56,7 @@ DASHBOARD_JS = "/static/js/dashboard.js" NEIGHBORHOODS_TOPO = "/static/js/neighborhoods-topo.min.js" + # CSS LENS_CSS = "/static/css/lens.css" REALESTATE_CSS = "/static/css/realestate.css" BANNER_CSS = "/static/css/banner.css" @@ -81,11 +66,18 @@ DEBUG = True PORT = 5000 + LOGGING_LEVEL = logging.DEBUG + APP_ROUTING = '/realestate' JS_APP_ROUTING = '/realestate' DATABASE_NAME = 'realestate' +ENGINE_STRING = 'postgresql://{0}:{1}@localhost/{2}'.format( + os.environ.get('REAL_ESTATE_DATABASE_USERNAME'), + os.environ.get('REAL_ESTATE_DATABASE_PASSWORD'), + DATABASE_NAME) + OPENING_DATE = date(2014, 2, 18) OPENING_DAY = OPENING_DATE.strftime('%Y-%m-%d') @@ -102,19 +94,17 @@ os.environ.get('REAL_ESTATE_DATABASE_USERNAME'), os.environ.get('REAL_ESTATE_DATABASE_PASSWORD'), DATABASE_NAME) + engine = create_engine(ENGINE_STRING) sn = sessionmaker(bind=engine) SESSION = sn() # Import this to any files that need database # Logging -LOG_DIR = '%s/logs' % PROJECT_DIR +LOG_DIR = '{}/logs'.format(PROJECT_DIR) LOG_FILE = "{}/realestate.log".format(LOG_DIR) -# if os.path.isfile('{0}/{1}'.format(LOG_DIR, LOG)): -# os.remove('{0}/{1}'.format(LOG_DIR, LOG)) - log = logging.getLogger(__name__) -log.setLevel(logging.DEBUG) +log.setLevel(LOGGING_LEVEL) # Create file handler which logs debug messages or higher filehandler = logging.handlers.RotatingFileHandler( @@ -122,7 +112,7 @@ maxBytes=(5 * 1024 * 1024), # 5 MB backupCount=5) -filehandler.setLevel(logging.DEBUG) +filehandler.setLevel(LOGGING_LEVEL) # Create formatter and add it to the handlers formatter = logging.Formatter( diff --git a/realestate/app.py b/www/app.py similarity index 62% rename from realestate/app.py rename to www/app.py index 146ebe6..d7c32d3 100644 --- a/realestate/app.py +++ b/www/app.py @@ -1,16 +1,15 @@ # -*- coding: utf-8 -*- -'''The controller that routes requests and returns responses.''' +"""The controller that routes requests and returns responses.""" import os import urllib # from flask.ext.cache import Cache from flask import Flask, request, Response -from functools import wraps -from realestate import log, APP_ROUTING, DEBUG, RELOADER, PORT -from realestate.models import Models -from realestate.views import Views +from www import log, APP_ROUTING, DEBUG, RELOADER, PORT +from www.models import Models +from www.views import Views app = Flask(__name__) @@ -20,35 +19,31 @@ # @cache.memoize(timeout=5000) @app.route("%s/" % (APP_ROUTING), methods=['GET']) def home(): - '''Receives a GET call for the homepage (/) and returns the view.''' - + """Receive a GET call for the homepage (/) and returns the view.""" log.debug('home') data = Models().get_home() - view = Views().get_home(data) - return view # @cache.memoize(timeout=5000) @app.route("%s/input" % (APP_ROUTING), methods=['POST']) def searchbar_input(): - ''' - Receives a ___ call from the autocomplete dropdown and returns a dict - of suggestions. + """ + Receive a POST call from the autocomplete dropdown. + + Return a dict of suggestions. :param query: The search bar input. :type query: string :returns: A dict of matching suggestions. - ''' - + """ term = request.args.get('q') log.debug('term: %s', term) data = Models().searchbar_input(term) - return data @@ -56,30 +51,27 @@ def searchbar_input(): @app.route("%s/search/" % (APP_ROUTING), methods=['GET', 'POST']) @app.route("%s/search" % (APP_ROUTING), methods=['GET', 'POST']) def search(): - ''' - Receives a request (GET or POST) for the /search page and returns a view - of some or all of the /search page, depending on whether GET or POST. + """ + Receive a request (GET or POST) for the /search page. + + Return a view some or all of the /search page, depending on whether GET + or POST. - todo: :param request: Incoming data :type request: dict? :returns: View of search page. - ''' - + """ if request.method == 'GET': log.debug('search GET') data, newrows, jsdata = Models().get_search(request) - view = Views().get_search(data, newrows, jsdata) if request.method == 'POST': log.debug('search POST') data = request.get_json() - data, newrows, jsdata = Models().post_search(data) - view = Views().post_search(data, newrows, jsdata) return view @@ -89,16 +81,15 @@ def search(): @app.route("%s/sale/" % (APP_ROUTING), methods=['GET']) def sale(instrument_no=None): """ - Receives a GET request for a particular sale's individual page. + Receive a GET request for a particular sale's individual page. :param instrument_no: The sale's instrument number, determined via the URL. :type instrument_no: string :returns: The sale's page or an error page if no sale found. """ - log.debug('sale') - instrument_no = urllib.unquote(instrument_no).decode('utf8') + instrument_no = urllib.parse.unquote(instrument_no) # .decode('utf8') data, jsdata, newrows = Models().get_sale(instrument_no) @@ -110,7 +101,7 @@ def sale(instrument_no=None): def check_auth(username, password): """ - Checks if given username and password match correct credentials. + Check if given username and password match correct credentials. :param username: The entered username. :type username: string @@ -118,14 +109,12 @@ def check_auth(username, password): :type password: string :returns: bool. True if username and password are correct, False otherwise. """ - return (username == os.environ.get('REAL_ESTATE_DASHBOARD_USERNAME') and password == os.environ.get('REAL_ESTATE_DASHBOARD_PASSWORD')) def authenticate(): """Return error message.""" - return Response( 'Could not verify your access level for that URL.\n' 'You have to login with proper credentials', @@ -133,66 +122,19 @@ def authenticate(): {'WWW-Authenticate': 'Basic realm="Login Required"'}) -def requires_auth(f): - """Authorization process.""" - - @wraps(f) - def decorated(*args, **kwargs): - '''docstring''' - - auth = request.authorization - - if not auth or not check_auth(auth.username, auth.password): - return authenticate() - - return f(*args, **kwargs) - - return decorated - - -# todo -# @cache.memoize(timeout=5000) -@app.route("%s/dashboard/" % (APP_ROUTING), methods=['GET', 'POST']) -@requires_auth -def dashboard(): - """The admin dashboard page for making adjustments to the database.""" - - if request.method == 'GET': - log.debug('GET dashboard') - - # data = Models().get_dashboard() - - # view = Views().get_dashboard(data) - - # return view - - if request.method == 'POST': - log.debug('POST dashboard') - - # data = request.get_json() - - # data = Models().post_dashboard(data) - - # view = Views().post_dashboard(data) - - # return view - - # @cache.memoize(timeout=5000) @app.errorhandler(404) def page_not_found(error): """ - Returns an error page. + Return an error page. :param error: The error message(?). :type error: not sure :returns: The view. """ - log.debug(error) view = Views().get_error_page() - return view @@ -200,5 +142,4 @@ def page_not_found(error): app.run( port=PORT, use_reloader=RELOADER, - debug=DEBUG - ) + debug=DEBUG) diff --git a/realestate/db.py b/www/db.py similarity index 99% rename from realestate/db.py rename to www/db.py index ae2e78d..fa228a5 100644 --- a/realestate/db.py +++ b/www/db.py @@ -21,8 +21,7 @@ Date, Float, ForeignKey, - Boolean -) + Boolean) from geoalchemy2 import Geometry Base = declarative_base() @@ -68,8 +67,7 @@ class Location(Base): document_id = Column( String, ForeignKey("details.document_id", ondelete="CASCADE"), - nullable=False - ) + nullable=False) street_number = Column(String) address = Column(String) district = Column(String) @@ -175,8 +173,7 @@ def __repr__(self): self.rating, self.zip_code, self.neighborhood, - self.location_publish - ) + self.location_publish) return representation @@ -305,8 +302,7 @@ class Dashboard(Base): instrument_no = Column( String, # ForeignKey("cleaned.instrument_no"), - nullable=False - ) + nullable=False) latitude = Column(Float) longitude = Column(Float) zip_code = Column(String) @@ -578,7 +574,7 @@ def __repr__(self): class Neighborhood(Base): """ - Fields for the `neighborhoods` table. + Table fields for the `neighborhoods` table. :param gid: Integer. Primary key ID. :param objectid: Integer. Not sure diff --git a/realestate/models.py b/www/models.py similarity index 81% rename from realestate/models.py rename to www/models.py index f9dcf04..edf1080 100644 --- a/realestate/models.py +++ b/www/models.py @@ -1,52 +1,42 @@ # -*- coding: utf-8 -*- -'''Gets the data.''' +"""Get the data.""" -import os import math import urllib # from flask.ext.cache import Cache -from flask import ( - # request, - jsonify -) + +from flask import jsonify from sqlalchemy import desc -from realestate.db import ( - Cleaned, - Neighborhood -) -# from realestate.lib.check_assessor_urls import Assessor -from realestate.lib.results_language import ResultsLanguage -from realestate.lib.utils import Utils -from realestate import log, TODAY_DAY, SESSION +from www.db import Cleaned, Neighborhood +from www.results_language import ResultsLanguage +from www.utils import get_num_with_curr_sign, ymd_to_full_date +from www import log, TODAY_DAY, SESSION class Models(object): - - '''Gathers data from particular requests.''' + """Gather data from particular requests.""" def __init__(self, initial_date=None, until_date=None): - ''' - Initializes self variables and establishes connection to database. + """ + Initialize self variables and establishes connection to database. :param initial_date: string. YYYY-MM-DD. Default is None. :type initial_date: string :param until_date: string. YYYY-MM-DD. Default is None. :type until_date: string - ''' - + """ self.initial_date = initial_date self.until_date = until_date def get_home(self): - ''' - Gets data for the homepage (/realestate/). + """ + Get data for the homepage (/realestate/). :returns: Data for the homepage, such as date the app was last updated and a list of neighborhoods for the dropdown. - ''' - + """ log.debug('get_home') update_date = self.get_last_updated_date() @@ -60,29 +50,29 @@ def get_home(self): return data def query_search_term_limit_3(self, table, term): - ''' - Gets the top three results for autocomplete dropdown. + """ + Get the top three results for autocomplete dropdown. :param table: string. The database to query. :type table: string :param term: string. The autocomplete term entered in search box. :type term: string :returns: A SQLAlchemy query result for three matches, at most. - ''' - + """ query = SESSION.query( getattr(Cleaned, table) ).filter( - getattr(Cleaned, table).ilike('%%%s%%' % term) + getattr(Cleaned, table).ilike('%%{}%%'.format(term)) ).distinct().limit(3).all() SESSION.close() return query def searchbar_input(self, term): - ''' - Receives the autocomplete term from the search input and returns - a JSON with three suggestions for each of the following + """ + Receive the autocomplete term from the search input. + + Return a JSON with three suggestions for each of the following categories: neighborhoods, ZIP codes, locations, buyers and sellers. @@ -90,11 +80,10 @@ def searchbar_input(self, term): :type term: string :returns: A JSON with at most three suggestions for each category. - ''' - + """ log.debug('searchbar_input') - term = urllib.unquote(term).decode('utf8') + term = urllib.parse.unquote(term) # .decode('utf8') query_neighborhoods = self.query_search_term_limit_3( 'neighborhood', term) @@ -132,19 +121,16 @@ def searchbar_input(self, term): log.debug(response) - return jsonify( - response=response - ) + return jsonify(response=response) @staticmethod def parse_query_string(request): - ''' - Receives URL query string parameters and returns as dict. + """ + Receive URL query string parameters and returns as dict. :param request: A (Flask object?) containing query string. :returns: A dict with the query string parameters. - ''' - + """ data = {} data['name_address'] = request.args.get('q') data['amount_low'] = request.args.get('a1') @@ -162,17 +148,17 @@ def parse_query_string(request): return data def determine_pages(self, data): - ''' - Receives data dict and returns with additional - information about pager (number of records, page length, number - of pages, current page and page offset) and URL query string + """ + Receive data dict and return with additional information about pager. + + Includes number of records, page length, number + of pages, current page and page offset and URL query string parameters and returns as dict. :param data: The response's data dict. :type data: dict :returns: The dict with additional pager information. - ''' - + """ query = self.find_all_publishable_rows_fitting_criteria(data) data['number_of_records'] = len(query) @@ -185,13 +171,12 @@ def determine_pages(self, data): return data def get_search(self, request): - ''' + """ GET call for /realestate/search. :param request: The request object(?). :returns: A data dict, SQL query result and JS data. - ''' - + """ data = self.parse_query_string(request) data = self.decode_data(data) data = self.convert_entries_to_db_friendly(data) @@ -204,18 +189,16 @@ def get_search(self, request): query = self.find_page_of_publishable_rows_fitting_criteria(data) for row in query: - row.amount = Utils().get_num_with_curr_sign(row.amount) - row.document_date = Utils().ymd_to_full_date( - (row.document_date).strftime('%Y-%m-%d'), no_day=True) + row.amount = get_num_with_curr_sign(row.amount) + row.document_date = ymd_to_full_date( + (row.document_date).strftime('%Y-%m-%d'), + no_day=True) features = self.build_features_json(query) - # newrows = query # todo: remove? - jsdata = { "type": "FeatureCollection", - "features": features - } + "features": features} data['results_css_display'] = 'none' @@ -230,13 +213,11 @@ def get_search(self, request): data['results_language'] = ResultsLanguage(data).main() log.debug('data') - # log.debug(data) return data, query, jsdata def post_search(self, data): - '''Process incoming POST data.''' - + """Process incoming POST data.""" log.debug('post_search') data = self.decode_data(data) @@ -254,8 +235,7 @@ def post_search(self, data): @staticmethod def update_pager(data): - '''docstring''' - + """TODO.""" cond = (data['direction'] == 'back' or data['direction'] == 'forward') @@ -282,8 +262,7 @@ def update_pager(data): return data def filter_by_map(self, data): - '''docstring''' - + """Use map bounds to filter results.""" query = self.map_query_length(data) data['number_of_records'] = len(query) # number of records # total number of pages: @@ -297,8 +276,7 @@ def filter_by_map(self, data): return query def do_not_filter_by_map(self, data): - '''docstring''' - + """TODO.""" query = self.find_all_publishable_rows_fitting_criteria(data) # data['page_length'] = self.PAGE_LENGTH data['number_of_records'] = len(query) # number of records @@ -315,8 +293,7 @@ def do_not_filter_by_map(self, data): return query def mapquery_db(self, data): - '''docstring''' - + """TODO.""" data['bounds'] = [ data['bounds']['_northEast']['lat'], data['bounds']['_northEast']['lng'], @@ -328,23 +305,21 @@ def mapquery_db(self, data): log.debug('map_button_state') - if data['map_button_state'] is True: # map filtering is on - query = self.filter_by_map(data) # todo: was defined elsewhere - - if data['map_button_state'] is False: # map filtering is off + if data['map_button_state']: # map filtering is on + query = self.filter_by_map(data) # TODO: was defined elsewhere + else: # map filtering is off query = self.do_not_filter_by_map(data) for row in query: - row.amount = Utils().get_num_with_curr_sign(row.amount) - row.document_date = Utils().ymd_to_full_date( + row.amount = get_num_with_curr_sign(row.amount) + row.document_date = ymd_to_full_date( (row.document_date).strftime('%Y-%m-%d'), no_day=True) features = self.build_features_json(query) jsdata = { "type": "FeatureCollection", - "features": features - } + "features": features} if data['number_of_records'] == 0: data['current_page'] = 0 @@ -359,15 +334,10 @@ def mapquery_db(self, data): log.debug('data returned:') log.debug(data) - # newrows = q - # todo: remove? - # Or necessary because it might change when the session is closed - return data, query, jsdata def get_sale(self, instrument_no): - '''docstring''' - + """TODO.""" data = {} data['update_date'] = self.get_last_updated_date() @@ -380,8 +350,8 @@ def get_sale(self, instrument_no): ).all() for row in query: - row.amount = Utils().get_num_with_curr_sign(row.amount) - row.document_date = Utils().ymd_to_full_date( + row.amount = get_num_with_curr_sign(row.amount) + row.document_date = ymd_to_full_date( (row.document_date).strftime('%Y-%m-%d'), no_day=True) # address = row.address # location_info = row.location_info @@ -396,36 +366,15 @@ def get_sale(self, instrument_no): "features": features } - # conds = (data['assessor_publish'] is False or - # data['assessor_publish'] is None or - # data['assessor_publish'] == '') - - # if conds: - # data['assessor'] = ( - # "Could not find this property on the Orleans Parish" + - # "Assessor's Office site. " + - # "Search based on other criteria.") - # else: - # url_param = Assessor().form_assessor_url( - # address, location_info) - # data['assessor_url'] = "http://qpublic9.qpublic.net/" + \ - # "la_orleans_display" + \ - # ".php?KEY=%s" % (url_param) - # data['assessor'] = "Read more " + \ - # "about this property on the Assessor's Office's" + \ - # "website." % (data['assessor_url']) + SESSION.close() if len(query) == 0: - SESSION.close() return None, None, None else: - SESSION.close() return data, jsdata, query def map_query_length(self, data): - '''docstring''' - + """TODO.""" query = SESSION.query( Cleaned ).filter( @@ -460,8 +409,7 @@ def map_query_length(self, data): # For when map filtering is turned on def query_with_map_boundaries(self, data): - '''docstring''' - + """TODO.""" query = SESSION.query( Cleaned ).filter( @@ -501,8 +449,7 @@ def query_with_map_boundaries(self, data): return query def find_all_publishable_rows_fitting_criteria(self, data): - '''docstring''' - + """TODO.""" # log.debug(data) query = SESSION.query( @@ -535,8 +482,7 @@ def find_all_publishable_rows_fitting_criteria(self, data): return query def find_page_of_publishable_rows_fitting_criteria(self, data): - '''docstring''' - + """TODO.""" # log.debug(data) query = SESSION.query( @@ -576,8 +522,7 @@ def find_page_of_publishable_rows_fitting_criteria(self, data): @staticmethod def convert_entries_to_db_friendly(data): - '''docstring''' - + """Convert front-end format to database format.""" if data['amount_low'] == '': data['amount_low'] = 0 if data['amount_high'] == '': @@ -591,8 +536,7 @@ def convert_entries_to_db_friendly(data): @staticmethod def revert_entries(data): - '''docstring''' - + """Convert database-friendly data back to front-end.""" if data['amount_low'] == 0: data['amount_low'] = '' if data['amount_high'] == 9999999999999: @@ -606,17 +550,18 @@ def revert_entries(data): @staticmethod def build_features_json(query): - '''docstring''' - + """TODO.""" log.debug(len(query)) features = [] features_dict = {} for row in query: # log.debug(row.buyers) - if row.location_publish is False: + if not row.location_publish: row.document_date = row.document_date + "*" - if row.permanent_flag is False: + + if not row.permanent_flag: row.document_date = row.document_date + u"\u2020" + features_dict = { "type": "Feature", "properties": { @@ -641,19 +586,19 @@ def build_features_json(query): @staticmethod def decode_data(data): - '''docstring''' - + """TODO.""" search_term = data['name_address'] - data['name_address'] = urllib.unquote(search_term).decode('utf8') + data['name_address'] = urllib.parse.unquote(search_term) + # .decode('utf8') neighborhood = data['neighborhood'] - data['neighborhood'] = urllib.unquote(neighborhood).decode('utf8') + data['neighborhood'] = urllib.parse.unquote(neighborhood) + # .decode('utf8') return data def get_last_updated_date(self): - '''docstring''' - + """TODO.""" log.debug('get_last_updated_date') query = SESSION.query( @@ -667,7 +612,7 @@ def get_last_updated_date(self): updated_date = '' for row in query: - updated_date = Utils().ymd_to_full_date( + updated_date = ymd_to_full_date( (row.document_recorded).strftime('%Y-%m-%d'), no_day=True) log.debug(updated_date) @@ -677,22 +622,17 @@ def get_last_updated_date(self): return updated_date def get_neighborhoods(self): - '''docstring''' - + """TODO.""" query = SESSION.query(Neighborhood.gnocdc_lab).all() neighborhoods = [] - for hood in query: + for neighborhood in query: neighborhoods.append( - (hood.gnocdc_lab).title().replace('Mcd', 'McD')) + (neighborhood.gnocdc_lab).title().replace('Mcd', 'McD')) neighborhoods.sort() SESSION.close() return neighborhoods - - -if __name__ == '__main__': - pass diff --git a/www/results_language.py b/www/results_language.py new file mode 100644 index 0000000..e87be81 --- /dev/null +++ b/www/results_language.py @@ -0,0 +1,146 @@ +# -*- coding: utf-8 -*- + +""" +Create the results language on the /search page. + +Ex. "10 sales found for keyword 'LLC' in the French Quarter neighborhood where +the price was between $10,000 and $200,000 between Feb. 18, 2014, and +Feb. 20, 2014.' +""" + +from www.utils import ( + get_number_with_commas, + get_num_with_curr_sign, + ymd_to_full_date) + + +class ResultsLanguage(object): + """Convert data to readable format for front-end.""" + + def __init__(self, data): + """Define class variables.""" + self.data = data + + def plural_or_not(self): + """Check if more than one result.""" + if self.data['number_of_records'] == 1: + plural_or_not = "sale" + else: + plural_or_not = "sales" + + return plural_or_not + + def add_initial_language(self, plural_or_not): + """Create initial sentence language.""" + number_of_sales = get_number_with_commas( + self.data['number_of_records']) + + final_sentence = "{0} {1} found".format( + str(number_of_sales), + plural_or_not) + + return final_sentence + + def add_keyword_language(self, final_sentence): + """Add keyword or key phrase language.""" + if self.data['name_address'] != '': + if len(self.data['name_address'].split(' ')) > 1: # Multiple words + term = "key phrase" + else: # One word + term = "keyword" + + final_sentence += ' for {0} "{1}"'.format( + term, + self.data['name_address']) + + return final_sentence + + def add_nbhd_zip_language(self, final_sentence): + """Add neighborhood or ZIP code language.""" + if self.data['neighborhood'] != '': + if self.data['zip_code'] != '': + final_sentence += " in the {0} neighborhood and {1}".format( + self.data['neighborhood'], + self.data['zip_code']) + else: + final_sentence += " in the {} neighborhood".format( + self.data['neighborhood']) + elif self.data['zip_code'] != '': + final_sentence += " in ZIP code {}".format(self.data['zip_code']) + + return final_sentence + + def add_amount_language(self, final_sentence): + """Add amount language.""" + if self.data['amount_low'] != '': + if self.data['amount_high'] != '': + final_sentence += ( + " where the price was between {0} and {1}").format( + get_num_with_curr_sign(self.data['amount_low']), + get_num_with_curr_sign(self.data['amount_high'])) + else: + final_sentence += ( + " where the price was greater than {}").format( + get_num_with_curr_sign(self.data['amount_low'])) + elif self.data['amount_high'] != '': + final_sentence += " where the price was less than {}".format( + get_num_with_curr_sign(self.data['amount_high'])) + + return final_sentence + + def add_date_language(self, final_sentence): + """ + Add date language. + + Ex. ...between Feb. 10, 2014, and Feb. 12, 2014. + """ + if self.data['begin_date'] != '': + if self.data['end_date'] != '': + final_sentence += " between {0}, and {1},".format( + ymd_to_full_date(self.data['begin_date'], no_day=True), + ymd_to_full_date(self.data['end_date'], no_day=True)) + else: + final_sentence += " after {},".format( + ymd_to_full_date(self.data['begin_date'], no_day=True)) + elif self.data['end_date'] != '': + final_sentence += " before {},".format( + ymd_to_full_date(self.data['end_date'], no_day=True)) + + return final_sentence + + def add_map_filtering_language(self, final_sentence): + """Add language depending on whether map filtering is turned on.""" + if self.data['map_button_state']: + final_sentence += ' in the current map view' + + return final_sentence + + @staticmethod + def add_final_sentence_language(final_sentence): + """End sentences.""" + # Punctuation comes before quotation marks + if final_sentence[-1] == "'" or final_sentence[-1] == '"': + last_character = final_sentence[-1] + final_sentence_list = list(final_sentence) + final_sentence_list[-1] = '.' + final_sentence_list.append(last_character) + final_sentence = ''.join(final_sentence_list) + elif final_sentence[-1] == ",": # Ending on date + final_sentence = final_sentence[:-1] + final_sentence += '.' + else: + final_sentence += '.' + + return final_sentence + + def main(self): + """Run through all sentence-building methods.""" + plural_or_not = self.plural_or_not() + final_sentence = self.add_initial_language(plural_or_not) + final_sentence = self.add_keyword_language(final_sentence) + final_sentence = self.add_nbhd_zip_language(final_sentence) + final_sentence = self.add_amount_language(final_sentence) + final_sentence = self.add_date_language(final_sentence) + final_sentence = self.add_map_filtering_language(final_sentence) + final_sentence = self.add_final_sentence_language(final_sentence) + return final_sentence diff --git a/realestate/static/css/banner.css b/www/static/css/banner.css similarity index 100% rename from realestate/static/css/banner.css rename to www/static/css/banner.css diff --git a/realestate/static/css/font-awesome.css b/www/static/css/font-awesome.css similarity index 100% rename from realestate/static/css/font-awesome.css rename to www/static/css/font-awesome.css diff --git a/realestate/static/css/foundation.min.css b/www/static/css/foundation.min.css similarity index 100% rename from realestate/static/css/foundation.min.css rename to www/static/css/foundation.min.css diff --git a/realestate/static/css/images/corporate-realty-large.jpg b/www/static/css/images/corporate-realty-large.jpg similarity index 100% rename from realestate/static/css/images/corporate-realty-large.jpg rename to www/static/css/images/corporate-realty-large.jpg diff --git a/realestate/static/css/images/corporate-realty-large.png b/www/static/css/images/corporate-realty-large.png similarity index 100% rename from realestate/static/css/images/corporate-realty-large.png rename to www/static/css/images/corporate-realty-large.png diff --git a/realestate/static/css/images/corporate-realty-original.jpg b/www/static/css/images/corporate-realty-original.jpg similarity index 100% rename from realestate/static/css/images/corporate-realty-original.jpg rename to www/static/css/images/corporate-realty-original.jpg diff --git a/realestate/static/css/images/corporate-realty.jpg b/www/static/css/images/corporate-realty.jpg similarity index 100% rename from realestate/static/css/images/corporate-realty.jpg rename to www/static/css/images/corporate-realty.jpg diff --git a/realestate/static/css/images/corporate-realty.png b/www/static/css/images/corporate-realty.png similarity index 100% rename from realestate/static/css/images/corporate-realty.png rename to www/static/css/images/corporate-realty.png diff --git a/realestate/static/css/images/favicon.ico b/www/static/css/images/favicon.ico similarity index 100% rename from realestate/static/css/images/favicon.ico rename to www/static/css/images/favicon.ico diff --git a/realestate/static/css/images/icons-000000.png b/www/static/css/images/icons-000000.png similarity index 100% rename from realestate/static/css/images/icons-000000.png rename to www/static/css/images/icons-000000.png diff --git a/realestate/static/css/images/icons-000000@2x.png b/www/static/css/images/icons-000000@2x.png similarity index 100% rename from realestate/static/css/images/icons-000000@2x.png rename to www/static/css/images/icons-000000@2x.png diff --git a/realestate/static/css/images/lens-logo-magnifying-glass-only.png b/www/static/css/images/lens-logo-magnifying-glass-only.png similarity index 100% rename from realestate/static/css/images/lens-logo-magnifying-glass-only.png rename to www/static/css/images/lens-logo-magnifying-glass-only.png diff --git a/realestate/static/css/images/lens-logo-retina.png b/www/static/css/images/lens-logo-retina.png similarity index 100% rename from realestate/static/css/images/lens-logo-retina.png rename to www/static/css/images/lens-logo-retina.png diff --git a/realestate/static/css/images/ui-bg_flat_75_ffffff_40x100.png b/www/static/css/images/ui-bg_flat_75_ffffff_40x100.png similarity index 100% rename from realestate/static/css/images/ui-bg_flat_75_ffffff_40x100.png rename to www/static/css/images/ui-bg_flat_75_ffffff_40x100.png diff --git a/realestate/static/css/images/ui-bg_glass_55_fbf9ee_1x400.png b/www/static/css/images/ui-bg_glass_55_fbf9ee_1x400.png similarity index 100% rename from realestate/static/css/images/ui-bg_glass_55_fbf9ee_1x400.png rename to www/static/css/images/ui-bg_glass_55_fbf9ee_1x400.png diff --git a/realestate/static/css/images/ui-bg_glass_65_ffffff_1x400.png b/www/static/css/images/ui-bg_glass_65_ffffff_1x400.png similarity index 100% rename from realestate/static/css/images/ui-bg_glass_65_ffffff_1x400.png rename to www/static/css/images/ui-bg_glass_65_ffffff_1x400.png diff --git a/realestate/static/css/images/ui-bg_glass_75_dadada_1x400.png b/www/static/css/images/ui-bg_glass_75_dadada_1x400.png similarity index 100% rename from realestate/static/css/images/ui-bg_glass_75_dadada_1x400.png rename to www/static/css/images/ui-bg_glass_75_dadada_1x400.png diff --git a/realestate/static/css/images/ui-bg_glass_75_e6e6e6_1x400.png b/www/static/css/images/ui-bg_glass_75_e6e6e6_1x400.png similarity index 100% rename from realestate/static/css/images/ui-bg_glass_75_e6e6e6_1x400.png rename to www/static/css/images/ui-bg_glass_75_e6e6e6_1x400.png diff --git a/realestate/static/css/images/ui-bg_highlight-soft_75_cccccc_1x100.png b/www/static/css/images/ui-bg_highlight-soft_75_cccccc_1x100.png similarity index 100% rename from realestate/static/css/images/ui-bg_highlight-soft_75_cccccc_1x100.png rename to www/static/css/images/ui-bg_highlight-soft_75_cccccc_1x100.png diff --git a/realestate/static/css/images/ui-icons_454545_256x240.png b/www/static/css/images/ui-icons_454545_256x240.png similarity index 100% rename from realestate/static/css/images/ui-icons_454545_256x240.png rename to www/static/css/images/ui-icons_454545_256x240.png diff --git a/realestate/static/css/jquery-ui.css b/www/static/css/jquery-ui.css similarity index 100% rename from realestate/static/css/jquery-ui.css rename to www/static/css/jquery-ui.css diff --git a/realestate/static/css/jquery.tablesorter.pager.css b/www/static/css/jquery.tablesorter.pager.css similarity index 100% rename from realestate/static/css/jquery.tablesorter.pager.css rename to www/static/css/jquery.tablesorter.pager.css diff --git a/realestate/static/css/lens.css b/www/static/css/lens.css similarity index 100% rename from realestate/static/css/lens.css rename to www/static/css/lens.css diff --git a/realestate/static/css/lenstablesorter.css b/www/static/css/lenstablesorter.css similarity index 100% rename from realestate/static/css/lenstablesorter.css rename to www/static/css/lenstablesorter.css diff --git a/realestate/static/css/mapbox.css b/www/static/css/mapbox.css similarity index 100% rename from realestate/static/css/mapbox.css rename to www/static/css/mapbox.css diff --git a/realestate/static/css/realestate.css b/www/static/css/realestate.css similarity index 100% rename from realestate/static/css/realestate.css rename to www/static/css/realestate.css diff --git a/realestate/static/css/table.css b/www/static/css/table.css similarity index 100% rename from realestate/static/css/table.css rename to www/static/css/table.css diff --git a/realestate/static/fonts/FontAwesome.otf b/www/static/fonts/FontAwesome.otf similarity index 100% rename from realestate/static/fonts/FontAwesome.otf rename to www/static/fonts/FontAwesome.otf diff --git a/realestate/static/fonts/fontawesome-webfont.eot b/www/static/fonts/fontawesome-webfont.eot similarity index 100% rename from realestate/static/fonts/fontawesome-webfont.eot rename to www/static/fonts/fontawesome-webfont.eot diff --git a/realestate/static/fonts/fontawesome-webfont.svg b/www/static/fonts/fontawesome-webfont.svg similarity index 100% rename from realestate/static/fonts/fontawesome-webfont.svg rename to www/static/fonts/fontawesome-webfont.svg diff --git a/realestate/static/fonts/fontawesome-webfont.ttf b/www/static/fonts/fontawesome-webfont.ttf similarity index 100% rename from realestate/static/fonts/fontawesome-webfont.ttf rename to www/static/fonts/fontawesome-webfont.ttf diff --git a/realestate/static/fonts/fontawesome-webfont.woff b/www/static/fonts/fontawesome-webfont.woff similarity index 100% rename from realestate/static/fonts/fontawesome-webfont.woff rename to www/static/fonts/fontawesome-webfont.woff diff --git a/realestate/static/js/dashboard.js b/www/static/js/dashboard.js similarity index 100% rename from realestate/static/js/dashboard.js rename to www/static/js/dashboard.js diff --git a/realestate/static/js/foundation.min.js b/www/static/js/foundation.min.js similarity index 100% rename from realestate/static/js/foundation.min.js rename to www/static/js/foundation.min.js diff --git a/realestate/static/js/foundation.tooltip.js b/www/static/js/foundation.tooltip.js similarity index 100% rename from realestate/static/js/foundation.tooltip.js rename to www/static/js/foundation.tooltip.js diff --git a/realestate/static/js/index.js b/www/static/js/index.js similarity index 100% rename from realestate/static/js/index.js rename to www/static/js/index.js diff --git a/realestate/static/js/jquery-1.11.0.min.js b/www/static/js/jquery-1.11.0.min.js similarity index 100% rename from realestate/static/js/jquery-1.11.0.min.js rename to www/static/js/jquery-1.11.0.min.js diff --git a/realestate/static/js/jquery-ui.js b/www/static/js/jquery-ui.js similarity index 100% rename from realestate/static/js/jquery-ui.js rename to www/static/js/jquery-ui.js diff --git a/realestate/static/js/jquery.tablesorter.min.js b/www/static/js/jquery.tablesorter.min.js similarity index 100% rename from realestate/static/js/jquery.tablesorter.min.js rename to www/static/js/jquery.tablesorter.min.js diff --git a/realestate/static/js/jquery.tablesorter.pager.min.js b/www/static/js/jquery.tablesorter.pager.min.js similarity index 100% rename from realestate/static/js/jquery.tablesorter.pager.min.js rename to www/static/js/jquery.tablesorter.pager.min.js diff --git a/realestate/static/js/leaflet.js b/www/static/js/leaflet.js similarity index 100% rename from realestate/static/js/leaflet.js rename to www/static/js/leaflet.js diff --git a/realestate/static/js/lens.js b/www/static/js/lens.js similarity index 100% rename from realestate/static/js/lens.js rename to www/static/js/lens.js diff --git a/realestate/static/js/map.js b/www/static/js/map.js similarity index 100% rename from realestate/static/js/map.js rename to www/static/js/map.js diff --git a/realestate/static/js/mapbox.uncompressed.js b/www/static/js/mapbox.uncompressed.js similarity index 100% rename from realestate/static/js/mapbox.uncompressed.js rename to www/static/js/mapbox.uncompressed.js diff --git a/realestate/static/js/modernizr.js b/www/static/js/modernizr.js similarity index 100% rename from realestate/static/js/modernizr.js rename to www/static/js/modernizr.js diff --git a/realestate/static/js/neighborhoods-topo.js b/www/static/js/neighborhoods-topo.js similarity index 100% rename from realestate/static/js/neighborhoods-topo.js rename to www/static/js/neighborhoods-topo.js diff --git a/realestate/static/js/neighborhoods-topo.min.js b/www/static/js/neighborhoods-topo.min.js similarity index 100% rename from realestate/static/js/neighborhoods-topo.min.js rename to www/static/js/neighborhoods-topo.min.js diff --git a/realestate/static/js/sale.js b/www/static/js/sale.js similarity index 100% rename from realestate/static/js/sale.js rename to www/static/js/sale.js diff --git a/realestate/static/js/search-area.js b/www/static/js/search-area.js similarity index 100% rename from realestate/static/js/search-area.js rename to www/static/js/search-area.js diff --git a/realestate/static/js/search.js b/www/static/js/search.js similarity index 100% rename from realestate/static/js/search.js rename to www/static/js/search.js diff --git a/realestate/static/js/squares-topo.js b/www/static/js/squares-topo.js similarity index 100% rename from realestate/static/js/squares-topo.js rename to www/static/js/squares-topo.js diff --git a/realestate/templates/404.html b/www/templates/404.html similarity index 100% rename from realestate/templates/404.html rename to www/templates/404.html diff --git a/realestate/templates/banner.html b/www/templates/banner.html similarity index 100% rename from realestate/templates/banner.html rename to www/templates/banner.html diff --git a/realestate/templates/footer.html b/www/templates/footer.html similarity index 100% rename from realestate/templates/footer.html rename to www/templates/footer.html diff --git a/realestate/templates/head.html b/www/templates/head.html similarity index 100% rename from realestate/templates/head.html rename to www/templates/head.html diff --git a/realestate/templates/index.html b/www/templates/index.html similarity index 100% rename from realestate/templates/index.html rename to www/templates/index.html diff --git a/realestate/templates/js.html b/www/templates/js.html similarity index 100% rename from realestate/templates/js.html rename to www/templates/js.html diff --git a/realestate/templates/sale.html b/www/templates/sale.html similarity index 100% rename from realestate/templates/sale.html rename to www/templates/sale.html diff --git a/realestate/templates/search-area.html b/www/templates/search-area.html similarity index 100% rename from realestate/templates/search-area.html rename to www/templates/search-area.html diff --git a/realestate/templates/search.html b/www/templates/search.html similarity index 100% rename from realestate/templates/search.html rename to www/templates/search.html diff --git a/realestate/templates/table.html b/www/templates/table.html similarity index 100% rename from realestate/templates/table.html rename to www/templates/table.html diff --git a/www/utils.py b/www/utils.py new file mode 100644 index 0000000..300229a --- /dev/null +++ b/www/utils.py @@ -0,0 +1,156 @@ +# -*- coding: utf-8 -*- + +"""Utility functions.""" + +import re + +from datetime import datetime + + +zip_codes = [ + 70112, 70113, 70114, 70115, 70116, 70117, 70118, 70119, 70121, 70122, + 70123, 70124, 70125, 70126, 70127, 70128, 70129, 70130, 70131, 70139, + 70140, 70141, 70142, 70143, 70145, 70146, 70148, 70149, 70150, 70151, + 70152, 70153, 70154, 70156, 70157, 70158, 70159, 70160, 70161, 70162, + 70163, 70164, 70165, 70166, 70167, 70170, 70172, 70174, 70175, 70176, + 70177, 70178, 70179, 70181, 70182, 70183, 70184, 70185, 70186, 70187, + 70189, 70190, 70195] + + +def convert_amount(amount): + """Convert formatted string amount to integer.""" + amount = re.sub(r"\$", r"", amount) + amount = re.sub(r"\,", r"", amount) + return int(float(amount)) + + +def get_number_with_commas(value): + """Convert integer to formatted string.""" + return "{:,}".format(value) + + +def get_num_with_curr_sign(value): + """Convert integer to formatted currency string.""" + return "${:,}".format(int(value)) + + +def ymd_to_mdy(value): + """ + Convert yyyy-mm-dd to mm-dd-yyyy. + + :param value: A date string. + :type value: str + :returns: str + """ + if value is None: + return "None" + else: + value = datetime.strptime(value, '%Y-%m-%d').date() + return value.strftime("%m-%d-%Y") + + +def ymd_to_mdy_slashes(value): + """Convert yyyy-mm-dd to mm/dd/yyyy.""" + if value is None: + return "None" + else: + value = datetime.strptime(value, '%Y-%m-%d').date() + value = value.strftime("%m/%d/%Y") + return value + + +def ymd_to_full_date(value, no_day=False): + """Convert yyyy-mm-dd to Day, Month Date, Year.""" + if value is None: + return "None" + + # 12/31/2016. Why? + # if isinstance(value, unicode): # TODO: Why? Remove. + if value[2] == "/": # TODO: Hack. Improve. + readable_date = str(value) + readable_date = datetime.strptime( + readable_date, '%m/%d/%Y').date() + readable_date = readable_date.strftime('%b. %-d, %Y') + else: # 2016-12-31. Why? + if no_day is False: + readable_datetime = datetime.strptime( + value, '%Y-%m-%d').date() + readable_date = readable_datetime.strftime( + '%A, %b. %-d, %Y') + else: + readable_datetime = datetime.strptime( + value, '%Y-%m-%d').date() + readable_date = readable_datetime.strftime('%b. %-d, %Y') + + readable_date = readable_date.replace('Mar.', 'March') + readable_date = readable_date.replace('Apr.', 'April') + readable_date = readable_date.replace('May.', 'May') + readable_date = readable_date.replace('Jun.', 'June') + readable_date = readable_date.replace('Jul.', 'July') + return readable_date + + +def convert_month_to_ap_style(month): + """ + Convert month to abbreviated AP style. + + Ex. January => Jan. May ==> May. + """ + if re.match(r"[jJ][aA]", month): + month = "Jan." + + if re.match(r"[fF]", month): + month = "Feb." + + if re.match(r"[mM][aA][rR]", month): + month = "March" + + if re.match(r"[aA][pP]", month): + month = "April" + + if re.match(r"[mM][aA][yY]", month): + month = "May" + + if re.match(r"[jJ][uU][nN]", month): + month = "June" + + if re.match(r"[jJ][uU][lL]", month): + month = "July" + + if re.match(r"[aA][uU]", month): + month = "Aug." + + if re.match(r"[sS][eE]", month): + month = "Sept." + + if re.match(r"[oO][cC]", month): + month = "Oct." + + if re.match(r"[nN][oO]", month): + month = "Nov." + + if re.match(r"[dD][eE]", month): + month = "Dec." + + return month + + +def binary_to_english(bit): + """Convert 0/1 to No/Yes.""" + bit = int(bit) + conversion_dict = { + 0: "No", + 1: "Yes"} + english = conversion_dict[bit] + return english + + +def english_to_binary(english): + """Convert No/Yes to 0/1.""" + # Accepts Yes, Y, yeah, yes sir, etc. + english = english[0].upper() + conversion_dict = { + "N": 0, + "Y": 1} + bit = conversion_dict[english] + return bit diff --git a/www/views.py b/www/views.py new file mode 100644 index 0000000..7f14845 --- /dev/null +++ b/www/views.py @@ -0,0 +1,155 @@ +# -*- coding: utf-8 -*- + +"""Render the views.""" + +# from flask.ext.cache import Cache +from flask import render_template, jsonify, make_response + +from www.utils import zip_codes +from www import ( + log, + LENS_JS, + INDEX_JS, + SEARCH_JS, + SEARCH_AREA_JS, + SALE_JS, + MAP_JS, + LENS_CSS, + REALESTATE_CSS, + TABLE_CSS, + BANNER_CSS, + JS_APP_ROUTING) + + +class Views(object): + """Methods for each page in the app.""" + + def __init__(self): + """Commonly accessed static files.""" + # self.home_assets = { + # 'js': LENS_JS, + # 'css': LENS_CSS, + # 'index_js': INDEX_JS, + # 'search_area_js': SEARCH_AREA_JS, + # 'js_app_routing': JS_APP_ROUTING, + # 'zip_codes': zip_codes + # } + # self.search_assets = { + # 'js': LENS_JS, + # 'search_js': SEARCH_JS, + # 'search_area_js': SEARCH_AREA_JS, + # 'map_js': MAP_JS, + # 'css': LENS_CSS, + # 'js_app_routing': JS_APP_ROUTING, + # 'zip_codes': zip_codes + # } + # self.sale_assets = { + # 'js': LENS_JS, + # 'css': LENS_CSS, + # 'salejs': SALE_JS + # } + + def get_home(self, data): + """Return view for /realestate/.""" + log.debug('get_home') + + rendered_template = render_template( + 'index.html', + data=data, + # home_assets=self.home_assets + lens_js=LENS_JS, + lens_css=LENS_CSS, + realestate_css=REALESTATE_CSS, + banner_css=BANNER_CSS, + table_css=TABLE_CSS, + index_js=INDEX_JS, + search_area_js=SEARCH_AREA_JS, + js_app_routing=JS_APP_ROUTING, + zip_codes=zip_codes) + + response = make_response(rendered_template) + + return response + + def get_search(self, data, newrows, js_data): + """Return GET view for /realestate/search.""" + log.debug('get_search') + + rendered_template = render_template( + 'search.html', + data=data, + newrows=newrows, + js_data=js_data, + # search_assets=self.search_assets + lens_js=LENS_JS, + search_js=SEARCH_JS, + search_area_js=SEARCH_AREA_JS, + map_js=MAP_JS, + lens_css=LENS_CSS, + realestate_css=REALESTATE_CSS, + banner_css=BANNER_CSS, + table_css=TABLE_CSS, + js_app_routing=JS_APP_ROUTING, + zip_codes=zip_codes) + + response = make_response(rendered_template) + + return response + + @staticmethod + def post_search(data, newrows, js_data): + """Return updated views for /realestate/search.""" + log.debug('post_search') + + log.debug('returned newrows') + log.debug(newrows) + + table_template = render_template( + 'table.html', + newrows=newrows) + + log.debug('returned js_data:') + log.debug(js_data) + + log.debug('returned data') + log.debug(data) + + jsonified = jsonify( + table_template=table_template, + js_data=js_data, + data=data) + + return jsonified + + def get_sale(self, data, js_data, newrows): + """Return GET view for /realestate/sale.""" + log.debug('get_sale') + + rendered_template = render_template( + 'sale.html', + data=data, + newrows=newrows, + js_data=js_data, + # sale_assets=self.sale_assets + lens_js=LENS_JS, + lens_css=LENS_CSS, + realestate_css=REALESTATE_CSS, + banner_css=BANNER_CSS, + table_css=TABLE_CSS, + sale_js=SALE_JS) + + response = make_response(rendered_template) + + return response + + def get_error_page(self): + """Return 404 error page.""" + rendered_template = render_template( + '404.html', + lens_css=LENS_CSS, + lens_js=LENS_JS, + index_js=INDEX_JS) + + response = make_response(rendered_template) + + return response, 404