Skip to content

Commit

Permalink
remove fsspec reqs
Browse files Browse the repository at this point in the history
  • Loading branch information
JosephMontoya-TRI committed Apr 6, 2021
1 parent 9057405 commit 9357e84
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 10 deletions.
13 changes: 8 additions & 5 deletions camd/campaigns/structure_discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
from camd.agent.stability import AgentStabilityAdaBoost
from camd.campaigns.base import Campaign
from camd import CAMD_S3_BUCKET, __version__
from camd.utils.data import load_dataframe, s3_sync, s3_key_exists
from camd.utils.data import load_dataframe, s3_sync, s3_key_exists, \
upload_s3_file, download_s3_file
from camd.analysis import StabilityAnalyzer
from camd.experiment.dft import OqmdDFTonMC1
from camd.experiment.base import ATFSampler
Expand Down Expand Up @@ -77,11 +78,12 @@ def from_chemsys(cls, chemsys, prefix="proto-dft-2/runs",

# Get structure domain
# Check cache
cache_key = "protosearch_cache/v1/{}/{}/candidates.pickle".format(chemsys, n_max_atoms)
cache_key = "protosearch_cache/v1/{}/{}/candidate_data.pickle".format(chemsys, n_max_atoms)
# TODO: create test of isfile
if s3_key_exists(bucket=CAMD_S3_BUCKET, key=cache_key):
logger.info("Found cached protosearch domain.")
candidate_data = pd.read_pickle("s3://{}/{}".format(CAMD_S3_BUCKET, cache_key))
download_s3_file(cache_key, CAMD_S3_BUCKET, "candidate_data.pickle")
candidate_data = pd.read_pickle("candidate_data.pickle")
logger.info("Loaded cached {}.".format(cache_key))
else:
logger.info("Generating domain with max {} atoms.".format(n_max_atoms))
Expand All @@ -92,11 +94,12 @@ def from_chemsys(cls, chemsys, prefix="proto-dft-2/runs",
n_max_atoms=n_max_atoms, **{'grid': range(1, max_coeff)})
candidate_data = domain.candidates()
logger.info("Candidates generated")
candidate_data.to_pickle("s3://{}/{}".format(CAMD_S3_BUCKET, cache_key))
candidate_data.to_pickle("candidate_data.pickle")
upload_s3_file(cache_key, CAMD_S3_BUCKET, "candidate_data.pickle")
logger.info("Cached protosearch domain at {}.".format(cache_key))

# Dump structure/candidate data
candidate_data.to_pickle("candidate_data.pickle")

s3_sync(s3_bucket=CAMD_S3_BUCKET, s3_prefix=s3_prefix, sync_path='.')

# Set up agents and loop parameters
Expand Down
36 changes: 36 additions & 0 deletions camd/utils/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,3 +460,39 @@ def s3_key_exists(key, bucket):
else:
# The object does exist.
return True


def download_s3_file(key, bucket, output_filename):
"""
Quick utility to download s3 file
Args:
key (str): key to download
bucket (str): bucket from which to download
output_filename (str): output filename for object
Returns:
(bool): whether the key was found in the bucket
"""
s3_client = boto3.client('s3')
s3_client.download_file(bucket, key, output_filename)
return True


def upload_s3_file(key, bucket, filename):
"""
Quick utility to upload s3 file
Args:
key (str): key to download
bucket (str): bucket from which to download
filename (str): output filename for object
Returns:
(bool): whether the key was found in the bucket
"""
s3_client = boto3.client('s3')
s3_client.upload_file(filename, bucket, key)
return True
2 changes: 0 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@ taburu==2020.5.9
protosearch==2020.5.10
GPy==1.9.9
watchtower==1.0.6
fsspec==0.9.0
s3fs==0.6.0

# Pinned dependencies to help pip
tensorflow==2.4.0
Expand Down
4 changes: 1 addition & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,7 @@
"scikit-learn==0.24.1",
"taburu==2020.5.9",
"GPy==1.9.9",
"watchtower==1.0.6",
"fsspec==0.9.0",
"s3fs==0.6.0"
"watchtower==1.0.6"
],
extras_require={
"proto_dft": ["protosearch==2020.5.10"],
Expand Down

0 comments on commit 9357e84

Please sign in to comment.