Skip to content

Commit

Permalink
More logging
Browse files Browse the repository at this point in the history
  • Loading branch information
Robert Meyer committed Feb 11, 2018
1 parent c2a024a commit b25bf36
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 5 deletions.
7 changes: 6 additions & 1 deletion integration_tests/bchain/getdata_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,5 +57,10 @@ def test_scrape_date(steem):
directory = tempfile.mkdtemp()
tpbg.scrape_or_load_full_day(yesterday, steem, directory, stop_after=25)

assert len(os.listdir(directory))>0
assert len(os.listdir(directory)) == 1

tpbg.scrape_or_load_full_day(yesterday, steem, directory, stop_after=25)

assert len(os.listdir(directory)) == 1

shutil.rmtree(directory)
3 changes: 0 additions & 3 deletions trufflepig/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1 @@


__version__ = '0.1.0a'

4 changes: 3 additions & 1 deletion trufflepig/bchain/getdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,11 +238,13 @@ def scrape_or_load_full_day(date, steem, directory, overwrite=False,
filename = os.path.join(directory,filename)
if os.path.isfile(filename) and not overwrite:
logger.info('Found file {} will load it'.format(filename))
post_frame = pd.read_pickle(filename)
post_frame = pd.read_pickle(filename, compression='gzip')
else:
logger.info('File {} not found, will start scraping'.format(filename))
posts = get_all_posts_between(start_datetime, end_datetime, steem,
stop_after=stop_after)
post_frame = pd.DataFrame(data=posts, columns=sorted(posts[0].keys()))
if store:
logger.info('Storing file {} to disk'.format(filename))
post_frame.to_pickle(filename, compression='gzip')
return post_frame

0 comments on commit b25bf36

Please sign in to comment.