From b25bf366fe554c7854f1ebfbae286bc854a18f2f Mon Sep 17 00:00:00 2001 From: Robert Meyer Date: Sun, 11 Feb 2018 20:06:40 +0100 Subject: [PATCH] More logging --- integration_tests/bchain/getdata_test.py | 7 ++++++- trufflepig/__init__.py | 3 --- trufflepig/bchain/getdata.py | 4 +++- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/integration_tests/bchain/getdata_test.py b/integration_tests/bchain/getdata_test.py index 4525574..7cd729c 100644 --- a/integration_tests/bchain/getdata_test.py +++ b/integration_tests/bchain/getdata_test.py @@ -57,5 +57,10 @@ def test_scrape_date(steem): directory = tempfile.mkdtemp() tpbg.scrape_or_load_full_day(yesterday, steem, directory, stop_after=25) - assert len(os.listdir(directory))>0 + assert len(os.listdir(directory)) == 1 + + tpbg.scrape_or_load_full_day(yesterday, steem, directory, stop_after=25) + + assert len(os.listdir(directory)) == 1 + shutil.rmtree(directory) diff --git a/trufflepig/__init__.py b/trufflepig/__init__.py index 903f88c..bcf8f5f 100644 --- a/trufflepig/__init__.py +++ b/trufflepig/__init__.py @@ -1,4 +1 @@ - - __version__ = '0.1.0a' - diff --git a/trufflepig/bchain/getdata.py b/trufflepig/bchain/getdata.py index 2cf4906..c0e2c56 100644 --- a/trufflepig/bchain/getdata.py +++ b/trufflepig/bchain/getdata.py @@ -238,11 +238,13 @@ def scrape_or_load_full_day(date, steem, directory, overwrite=False, filename = os.path.join(directory,filename) if os.path.isfile(filename) and not overwrite: logger.info('Found file {} will load it'.format(filename)) - post_frame = pd.read_pickle(filename) + post_frame = pd.read_pickle(filename, compression='gzip') else: + logger.info('File {} not found, will start scraping'.format(filename)) posts = get_all_posts_between(start_datetime, end_datetime, steem, stop_after=stop_after) post_frame = pd.DataFrame(data=posts, columns=sorted(posts[0].keys())) if store: + logger.info('Storing file {} to disk'.format(filename)) post_frame.to_pickle(filename, compression='gzip') return post_frame