Skip to content

Commit

Permalink
Merge pull request #45 from Knowledge-Graph-Hub/misc_fixes_1
Browse files Browse the repository at this point in the history
Remove lockfile properly and when encountering runtime errors
  • Loading branch information
caufieldjh committed Sep 15, 2021
2 parents 21e4d9a + d22acb4 commit 5652f33
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 14 deletions.
47 changes: 36 additions & 11 deletions kg_obo/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,8 @@ def track_obo_version(name: str = "", iri: str = "",
with open(track_file_local_path, 'w') as track_file:
track_file.write(yaml.dump(tracking))

client.upload_file(Filename=track_file_local_path, Bucket=bucket, Key=track_file_remote_path)
client.upload_file(Filename=track_file_local_path, Bucket=bucket, Key=track_file_remote_path,
ExtraArgs={'ACL':'public-read'})

os.unlink(track_file_local_path)

Expand Down Expand Up @@ -223,12 +224,27 @@ def download_ontology(url: str, file: str, logger: object) -> bool:

def run_transform(skip: list = [], get_only: list = [], bucket="bucket",
save_local=False, s3_test=False,
lock_file_remote_path: str = "kg-obo/lock",
log_dir="logs", data_dir="data",
remote_path="kg-obo",
track_file_local_path: str = "data/tracking.yaml",
tracking_file_remote_path: str = "kg-obo/tracking.yaml",
lock_file_remote_path: str = "kg-obo/lock"
) -> None:
tracking_file_remote_path: str = "kg-obo/tracking.yaml"
) -> bool:
"""
Perform setup, then kgx-mediated transforms for all specified OBOs.
:param skip: list of OBOs to skip, by ID
:param get_only: list of OBOs to transform, by ID (otherwise do all)
:param bucket: str of S3 bucket, to be specified as argument
:param save_local: bool for whether to retain transform results on local disk
:param s3_test: bool for whether to perform mock S3 upload only
:param lock_file_remote_path: str of path for lock file on S3
:param log_dir: str of local dir where any logs should be saved
:param data_dir: str of local dir where data should be saved
:param remote_path: str of remote path on S3 bucket
:param track_file_local_path: str of local path for tracking file
:param tracking_file_remote_path: str of path of tracking file on S3
:return: boolean indicating success or existing run encountered (False for unresolved error)
"""

# Set up logging
timestring = (datetime.now()).strftime("%Y-%m-%d_%H-%M-%S")
Expand All @@ -248,28 +264,35 @@ def run_transform(skip: list = [], get_only: list = [], bucket="bucket",
kgx_logger.addHandler(root_logger_handler)

# Check if there's already a run in progress (i.e., lock file exists)
# This isn't an error so it does not trigger an exit
if s3_test:
if kg_obo.upload.mock_check_lock(bucket, lock_file_remote_path):
sys.exit("Could not mock checking for lock file. Exiting...")
print("Could not mock checking for lock file. Exiting...")
return True
else:
if kg_obo.upload.check_lock(bucket, lock_file_remote_path):
sys.exit("A kg-obo run appears to be in progress. Exiting...")
print("A kg-obo run appears to be in progress. Exiting...")
return True

# Now set the lockfile
if s3_test:
if not kg_obo.upload.mock_set_lock(bucket, lock_file_remote_path, unlock=False):
sys.exit("Could not mock setting lock file. Exiting...")
print("Could not mock setting lock file. Exiting...")
return False
else:
if not kg_obo.upload.set_lock(bucket, lock_file_remote_path, unlock=False):
sys.exit("Could not set lock file on remote server. Exiting...")
print("Could not set lock file on remote server. Exiting...")
return False

# Check on existence of tracking file, and quit if it doesn't exist
# Check on existence of tracking file
if s3_test:
if not kg_obo.upload.mock_check_tracking(bucket, tracking_file_remote_path):
sys.exit("Could not mock checking tracking file. Exiting...")
print("Could not mock checking tracking file. Exiting...")
return False
else:
if not kg_obo.upload.check_tracking(bucket, tracking_file_remote_path):
sys.exit("Cannot locate tracking file on remote storage. Exiting...")
print("Cannot locate tracking file on remote storage. Exiting...")
return False

# Get the OBO Foundry list YAML and process each
yaml_onto_list_filtered = retrieve_obofoundry_yaml(skip=skip, get_only=get_only)
Expand Down Expand Up @@ -393,4 +416,6 @@ def run_transform(skip: list = [], get_only: list = [], bucket="bucket",
else:
if not kg_obo.upload.set_lock(bucket,lock_file_remote_path,unlock=True):
sys.exit("Could not set lock file on remote server. Exiting...")

return True

6 changes: 4 additions & 2 deletions kg_obo/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,8 +256,8 @@ def upload_index_files(ontology_name: str, versioned_obo_path: str) -> None:
:param versioned_obo_path: str of directory containing this ontology version
"""

# At present this will rebuild the root index at every transform/upload, which isn't great
# so a different function or making this more generic may help
# At present this will rebuild the root index at every transform/upload -
# this is intentional, or we may not write updates if the process exits early

ifilename = "index.html"

Expand Down Expand Up @@ -295,3 +295,5 @@ def upload_index_files(ontology_name: str, versioned_obo_path: str) -> None:
if filename != 'index.html':
ifile.write(f"\t\t<li>\n\t\t\t<a href={filename}>{filename}</a>\n\t\t</li>\n")
ifile.write(index_tail)

print(f"Created index for {dir}")
19 changes: 18 additions & 1 deletion run.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import sys
from pathlib import Path
from kg_obo.transform import run_transform
import kg_obo.upload

@click.command()
@click.option("--skip",
Expand All @@ -33,7 +34,23 @@
is_flag=True,
help="If used, upload to S3 bucket is tested only and false credentials are used.")
def run(skip, get_only, bucket, save_local, s3_test):
run_transform(skip, get_only, bucket, save_local, s3_test)
lock_file_remote_path = "kg-obo/lock"
try:
if run_transform(skip, get_only, bucket, save_local, s3_test, lock_file_remote_path):
print("Operation completed without errors.")
else:
print("Operation encountered errors. See logs for details.")
except Exception as e:
print(e)
print("Removing lock due to error...")
if s3_test:
if not kg_obo.upload.mock_set_lock(bucket,lock_file_remote_path,unlock=True):
print("Could not mock setting lock file.")
else:
if not kg_obo.upload.set_lock(bucket,lock_file_remote_path,unlock=True):
print("Could not remove lock file due to yet another error.")
else:
print("Lock removed.")

if __name__ == '__main__':
run()
2 changes: 2 additions & 0 deletions tests/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,8 @@ def test_retrieve_obofoundry_yaml_select(self):
self.assertEqual(yaml_onto_list_filtered[0], self.parsed_obo_yaml_sample[0])
yaml_onto_list_filtered = retrieve_obofoundry_yaml(yaml_url="https://raw.githubusercontent.com/Knowledge-Graph-Hub/kg-obo/main/tests/resources/ontologies.yml", skip=[],get_only=["bfo"])
self.assertEqual(yaml_onto_list_filtered[0], self.parsed_obo_yaml_sample[0])
with pytest.raises(Exception):
yaml_onto_list_filtered = retreive_obofoundry_yaml(yaml_url="")

@mock.patch('boto3.client')
def test_track_obo_version(self, mock_boto):
Expand Down

0 comments on commit 5652f33

Please sign in to comment.