Skip to content

Commit

Permalink
Fixed race conditions
Browse files Browse the repository at this point in the history
  • Loading branch information
Robert Meyer committed Feb 11, 2018
1 parent a8c1215 commit ec30345
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 1 deletion.
15 changes: 15 additions & 0 deletions scripts/create_raw_data_fixture.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import os

from trufflepig import config
import trufflepig.bchain.getdata as tpbg


directory = os.path.join(config.PROJECT_DIRECTORY, 'scraped_data')


frames = tpbg.scrape_or_load_training_data_parallel([config.NODE_URL],
directory,
days=20,
stop_after=100,
ncores=5,
current_datetime='2018-02-11')
8 changes: 7 additions & 1 deletion trufflepig/bchain/getdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,11 @@ def scrape_or_load_full_day(date, steem, directory, overwrite=False,
start_datetime = pd.to_datetime(date)
end_datetime = start_datetime + pd.Timedelta(days=1)
if not os.path.isdir(directory):
os.makedirs(directory)
try:
os.makedirs(directory)
except FileExistsError:
# race conditions
pass
filename = FILENAME_TEMPLATE.format(year=start_datetime.year,
month=start_datetime.month,
day=start_datetime.day)
Expand Down Expand Up @@ -278,6 +282,8 @@ def scrape_or_load_training_data_parallel(node_urls, directory,

if current_datetime is None:
current_datetime = pd.datetime.utcnow()
else:
current_datetime = pd.to_datetime(current_datetime)

start_datetime = current_datetime - pd.Timedelta(days=days + offset)

Expand Down

0 comments on commit ec30345

Please sign in to comment.