From fc04fde2273a9ce3a9a3cfb185353b9d0a66ea8c Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Fri, 11 Sep 2020 16:37:28 -0700 Subject: [PATCH 001/130] Remove superfluous pip install commands --- Jenkinsfile | 2 -- 1 file changed, 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index a28b1cb4..89070f0e 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -44,8 +44,6 @@ pipeline { ) sh '/usr/bin/python3.7 -m venv venv' sh '. venv/bin/activate' - sh './venv/bin/pip install wheel' - sh './venv/bin/pip install bmt' sh './venv/bin/pip install -r requirements.txt' sh './venv/bin/pip install .' } From 13581b04fff89ce01e2ae1423fe1c76982a66834 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Fri, 11 Sep 2020 17:54:03 -0700 Subject: [PATCH 002/130] Add build versioning to Jenkinsfile, also add code to make index.html files for s3 buckets --- Jenkinsfile | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 89070f0e..db92d7fb 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -117,18 +117,38 @@ pipeline { steps { dir('./gitrepo') { script { - if (env.BRANCH_NAME != 'master') { + if (env.BRANCH_NAME != 'master' || env.BRANCH_NAME != 'add_versioning_of_builds_run_jenkins') { echo "Will not push if not on correct branch." } else { + // code for building s3 index files + dir('./go-site') { + git branch: master, url: 'https://github.com/justaddcoffee/go-site.git' + } withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_JSON')]) { sh 'rm -fr data/transformed/.gitkeep' - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=plain/text --cf-invalidate put -r data/transformed s3://kg-hub-public-data/' - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=plain/text --cf-invalidate put data/merged/merged-kg.nt.gz s3://kg-hub-public-data/kg-covid-19.nt.gz' - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=plain/text --cf-invalidate put data/merged/merged-kg.tar.gz s3://kg-hub-public-data/kg-covid-19.tar.gz' - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=plain/text --cf-invalidate put merged-kg.jnl.gz s3://kg-hub-public-data/kg-covid-19.jnl.gz' - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=plain/text --cf-invalidate put *_stats*.yaml s3://kg-hub-public-data/' - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=plain/text --cf-invalidate ls s3://kg-hub-public-data/ | grep yaml > yaml_manifests.txt' - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=plain/text --cf-invalidate put yaml_manifests.txt s3://kg-hub-public-data/' + sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=plain/text --cf-invalidate put -r data/transformed s3://kg-hub-public-data/$BUILDSTARTDATE/' + sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=plain/text --cf-invalidate put data/merged/merged-kg.nt.gz s3://kg-hub-public-data/$BUILDSTARTDATE/kg-covid-19.nt.gz' + sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=plain/text --cf-invalidate put data/merged/merged-kg.tar.gz s3://kg-hub-public-data/$BUILDSTARTDATE/kg-covid-19.tar.gz' + sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=plain/text --cf-invalidate put merged-kg.jnl.gz s3://kg-hub-public-data/$BUILDSTARTDATE/kg-covid-19.jnl.gz' + sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=plain/text --cf-invalidate put *_stats.yaml s3://kg-hub-public-data/$BUILDSTARTDATE/stats/' + + // Build the new build directory index.html + sh 'python3 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data/$BUILDSTARTDATE --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/$BUILDSTARTDATE/ > build-index.html' + sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put build-index.html s3://kg-hub-public-data/$BUILDSTARTDATE/index.html' + + // Build the new build YAML subdirectory index.html + sh 'python3 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data/$BUILDSTARTDATE/yaml --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/$BUILDSTARTDATE/yaml/ > yaml-index.html' + sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put yaml-index.html s3://kg-hub-public-data/$BUILDSTARTDATE/yaml/index.html' + + // Build the new build transformed subdirectory index.html + sh 'python3 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data/$BUILDSTARTDATE/transformed --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/$BUILDSTARTDATE/transformed/ > transformed-index.html' + sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put transformed-index.html s3://kg-hub-public-data/$BUILDSTARTDATE/transformed/index.html' + + // Build the top level index.html + sh 'python3 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' + sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put top-level-index.html s3://kg-hub-public-data/index.html' + + // Should now appear at: // https://kg-hub.berkeleybop.io/[artifact name] } From a1b0386f6bbeccfd216434586fcd927b18b139db Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 14 Sep 2020 09:27:42 -0700 Subject: [PATCH 003/130] Update Jenkinsfile to do builds, and to make index.html files --- Jenkinsfile | 44 +++++++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index db92d7fb..54ba8e9c 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -125,30 +125,48 @@ pipeline { git branch: master, url: 'https://github.com/justaddcoffee/go-site.git' } withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_JSON')]) { + // + // make $BUILDSTARTDATE/ directory and sync to s3 bucket + // + sh 'mkdir $BUILDSTARTDATE/' + sh 'cp -p data/merged/merged-kg.nt.gz $BUILDSTARTDATE/' + sh 'cp -p data/merged/merged-kg.tar.gz $BUILDSTARTDATE/' + sh 'cp -p merged-kg.jnl.gz $BUILDSTARTDATE/' + // transformed data sh 'rm -fr data/transformed/.gitkeep' - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=plain/text --cf-invalidate put -r data/transformed s3://kg-hub-public-data/$BUILDSTARTDATE/' - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=plain/text --cf-invalidate put data/merged/merged-kg.nt.gz s3://kg-hub-public-data/$BUILDSTARTDATE/kg-covid-19.nt.gz' - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=plain/text --cf-invalidate put data/merged/merged-kg.tar.gz s3://kg-hub-public-data/$BUILDSTARTDATE/kg-covid-19.tar.gz' - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=plain/text --cf-invalidate put merged-kg.jnl.gz s3://kg-hub-public-data/$BUILDSTARTDATE/kg-covid-19.jnl.gz' - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=plain/text --cf-invalidate put *_stats.yaml s3://kg-hub-public-data/$BUILDSTARTDATE/stats/' - - // Build the new build directory index.html + sh 'cp -pr data/transformed $BUILDSTARTDATE/' + // stats dir + sh 'mkdir $BUILDSTARTDATE/stats/' + sh 'cp -p *_stats.yaml $BUILDSTARTDATE/stats/' + + // + // put $BUILDSTARTDATE/ in s3 bucket + // + sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put -pr $BUILDSTARTDATE s3://kg-hub-public-data/' + + // + // Build the new build directory index.html + // sh 'python3 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data/$BUILDSTARTDATE --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/$BUILDSTARTDATE/ > build-index.html' sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put build-index.html s3://kg-hub-public-data/$BUILDSTARTDATE/index.html' - - // Build the new build YAML subdirectory index.html - sh 'python3 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data/$BUILDSTARTDATE/yaml --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/$BUILDSTARTDATE/yaml/ > yaml-index.html' - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put yaml-index.html s3://kg-hub-public-data/$BUILDSTARTDATE/yaml/index.html' - + // Build the new build stats subdirectory index.html + sh 'python3 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data/$BUILDSTARTDATE/stats --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/$BUILDSTARTDATE/stats/ > stats-index.html' + sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put stats-index.html s3://kg-hub-public-data/$BUILDSTARTDATE/stats/index.html' // Build the new build transformed subdirectory index.html sh 'python3 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data/$BUILDSTARTDATE/transformed --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/$BUILDSTARTDATE/transformed/ > transformed-index.html' sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put transformed-index.html s3://kg-hub-public-data/$BUILDSTARTDATE/transformed/index.html' + // + // make $BUILDSTARTDATE the new current/ + // + sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate cp -pr s3://kg-hub-public-data/$BUILDSTARTDATE s3://kg-hub-public-data/new_current' + sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate rm -fr s3://kg-hub-public-data/current' + sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate mv s3://kg-hub-public-data/new_current s3://kg-hub-public-data/current' + // Build the top level index.html sh 'python3 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put top-level-index.html s3://kg-hub-public-data/index.html' - // Should now appear at: // https://kg-hub.berkeleybop.io/[artifact name] } From 84af9559f8dc212be599f17bacaebc0327108c2e Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 14 Sep 2020 09:35:39 -0700 Subject: [PATCH 004/130] Add pip install networkx to Jenkinsfile - seems to be required by kgx now --- Jenkinsfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Jenkinsfile b/Jenkinsfile index 54ba8e9c..7d57466c 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -44,6 +44,7 @@ pipeline { ) sh '/usr/bin/python3.7 -m venv venv' sh '. venv/bin/activate' + sh './venv/bin/pip install networkx' sh './venv/bin/pip install -r requirements.txt' sh './venv/bin/pip install .' } From 2236b842bb616f50062e0b798ac20faa02840d49 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 14 Sep 2020 09:38:35 -0700 Subject: [PATCH 005/130] Add pandas install before KGX install --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 7d57466c..06a86289 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -44,7 +44,7 @@ pipeline { ) sh '/usr/bin/python3.7 -m venv venv' sh '. venv/bin/activate' - sh './venv/bin/pip install networkx' + sh './venv/bin/pip install networkx pandas' sh './venv/bin/pip install -r requirements.txt' sh './venv/bin/pip install .' } From f37a16da937a7340c055ee5c6aeb75184f2558cb Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 14 Sep 2020 09:55:17 -0700 Subject: [PATCH 006/130] Added install of KGX requirements --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 06a86289..98017067 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -44,7 +44,7 @@ pipeline { ) sh '/usr/bin/python3.7 -m venv venv' sh '. venv/bin/activate' - sh './venv/bin/pip install networkx pandas' + sh './venv/bin/pip install networkx pandas ordered_set requests pyyaml stringcase biolinkml bmt cachetools pystache neo4jrestclient validators' sh './venv/bin/pip install -r requirements.txt' sh './venv/bin/pip install .' } From 177d7a2212131749a834d2962805c35012b5d710 Mon Sep 17 00:00:00 2001 From: Deepak Unni Date: Sat, 12 Sep 2020 15:42:16 -0700 Subject: [PATCH 007/130] Update merge step by leveraging KGX --- kg_covid_19/merge_utils/merge_kg.py | 139 +----------------------- merge.yaml | 161 +++++++++++++++++----------- requirements.txt | 2 +- run.py | 6 +- 4 files changed, 106 insertions(+), 202 deletions(-) diff --git a/kg_covid_19/merge_utils/merge_kg.py b/kg_covid_19/merge_utils/merge_kg.py index ab2ac604..0d51889e 100644 --- a/kg_covid_19/merge_utils/merge_kg.py +++ b/kg_covid_19/merge_utils/merge_kg.py @@ -4,10 +4,7 @@ from typing import Dict, List import yaml import networkx as nx -from kgx import NeoTransformer -from kgx.cli.utils import get_file_types, get_transformer -from kgx.operations.graph_merge import merge_all_graphs -from kgx.operations.summarize_graph import generate_graph_stats +from kgx.cli.cli_utils import merge def parse_load_config(yaml_file: str) -> Dict: @@ -24,145 +21,17 @@ def parse_load_config(yaml_file: str) -> Dict: config = yaml.load(YML, Loader=yaml.FullLoader) return config -# For NT export, any property that needs to be treated -# as anything but xsd:string should be defined here. -PROPERTY_TYPES = { - 'combined_score': 'xsd:float', - "neighborhood": 'xsd:float', - "neighborhood_transferred": 'xsd:float', - "fusion": 'xsd:float', - "cooccurence": 'xsd:float', - "homology": 'xsd:float', - "coexpression": 'xsd:float', - "coexpression_transferred": 'xsd:float', - "experiments": 'xsd:float', - "experiments_transferred": 'xsd:float', - "database": 'xsd:float', - "database_transferred": 'xsd:float', - "textmining": 'xsd:float', - "textmining_transferred": 'xsd:float' -} -def load_and_merge(yaml_file: str) -> nx.MultiDiGraph: +def load_and_merge(yaml_file: str, processes: int = 1) -> nx.MultiDiGraph: """Load and merge sources defined in the config YAML. Args: yaml_file: A string pointing to a KGX compatible config YAML. + processes: Number of processes to use. Returns: networkx.MultiDiGraph: The merged graph. """ - config = parse_load_config(yaml_file) - transformers: List = [] - - # make sure all files exist before we start load - for key in config['target']: - target = config['target'][key] - logging.info("Checking that file exist for {}".format(key)) - if target['type'] in get_file_types(): - for f in target['filename']: - if not os.path.exists(f) or not os.path.isfile(f): - raise FileNotFoundError("File {} for transform {} in yaml file {} " - "doesn't exist! Dying.", f, key, yaml_file) - - # read all the sources defined in the YAML - for key in config['target']: - target = config['target'][key] - logging.info("Loading {}".format(key)) - if target['type'] in get_file_types(): - # loading from a file - transformer = get_transformer(target['type'])() - if target['type'] in {'tsv', 'neo4j'}: - if 'filters' in target: - apply_filters(target, transformer) - for f in target['filename']: - transformer.parse(f, input_format='tsv') - transformer.graph.name = key - if 'operations' in target: - apply_operations(target, transformer) - transformers.append(transformer) - elif target['type'] == 'neo4j': - transformer = NeoTransformer(None, target['uri'], target['username'], target['password']) - if 'filters' in target: - apply_filters(target, transformer) - transformer.load() - if 'operations' in target: - apply_operations(target, transformer) - transformers.append(transformer) - transformer.graph.name = key - else: - logging.error("type {} not yet supported".format(target['type'])) - stats_filename = f"{key}_stats.yaml" - generate_graph_stats(transformer.graph, key, stats_filename) - - # merge all subgraphs into a single graph - merged_graph = merge_all_graphs([x.graph for x in transformers]) - merged_graph.name = 'merged_graph' - generate_graph_stats(merged_graph, merged_graph.name, "merged_graph_stats.yaml", ['provided_by'], ['provided_by']) - - # write the merged graph - if 'destination' in config: - for _, destination in config['destination'].items(): - if destination['type'] == 'neo4j': - destination_transformer = NeoTransformer( - merged_graph, - uri=destination['uri'], - username=destination['username'], - password=destination['password'] - ) - destination_transformer.save() - elif destination['type'] in get_file_types(): - destination_transformer = get_transformer(destination['type'])(merged_graph) - mode = 'w:gz' if destination['type'] in {'tsv'} else None - if destination['type'] in {'nt', 'nt.gz', 'ttl'}: - destination_transformer.set_property_types(PROPERTY_TYPES) - destination_transformer.save(destination['filename'], output_format=destination['type'], mode=mode) - else: - logging.error("type {} not yet supported for KGX load-and-merge operation.".format(destination['type'])) - + merged_graph = merge(yaml_file, processes=processes) return merged_graph - - -def apply_filters(target, transformer): - """Apply filters as defined in the YAML. - - Args: - target: The target from the YAML - transformer: The transformer corresponding to the target - - Returns: - None - - """ - filters = target['filters'] - node_filters = filters['node_filters'] if 'node_filters' in filters else {} - edge_filters = filters['edge_filters'] if 'edge_filters' in filters else {} - for k, v in node_filters.items(): - transformer.set_node_filter(k, set(v)) - for k, v in edge_filters.items(): - transformer.set_edge_filter(k, set(v)) - logging.info(f"with node filters: {node_filters}") - logging.info(f"with edge filters: {edge_filters}") - - -def apply_operations(target, transformer): - """Apply operations as defined in the YAML. - - Args: - target: The target from the YAML - transformer: The transformer corresponding to the target - - Returns: - None - - """ - operations = target['operations'] - for operation in operations: - op_name = operation['name'] - op_args = operation['args'] - module_name = '.'.join(op_name.split('.')[0:-1]) - function_name = op_name.split('.')[-1] - f = getattr(importlib.import_module(module_name), function_name) - logging.info(f"Applying operation {op_name} with args: {op_args}") - f(transformer.graph, **op_args) diff --git a/merge.yaml b/merge.yaml index 8e563296..0dca59c4 100644 --- a/merge.yaml +++ b/merge.yaml @@ -1,100 +1,133 @@ --- -target: - drug-central: +configuration: + output_directory: data/merged + checkpoint: true + property_types: + # define the type for non-canonical node/edge properties + combined_score: 'xsd:float' + confidence_score: 'xsd:float' + neighborhood: 'xsd:float' + neighborhood_transferred: 'xsd:float' + fusion: 'xsd:float' + cooccurence: 'xsd:float' + homology: 'xsd:float' + coexpression: 'xsd:float' + coexpression_transferred: 'xsd:float' + experiments: 'xsd:float' + experiments_transferred: 'xsd:float' + database: 'xsd:float' + database_transferred: 'xsd:float' + textmining: 'xsd:float' + textmining_transferred: 'xsd:float' + +merged_graph: + name: KG-COVID-19 Graph + targets: + drug-central: type: tsv filename: - - data/transformed/drug_central/nodes.tsv - - data/transformed/drug_central/edges.tsv - pharmgkb: + - data/transformed/drug_central/nodes.tsv + - data/transformed/drug_central/edges.tsv + pharmgkb: type: tsv filename: - - data/transformed/pharmgkb/nodes.tsv - - data/transformed/pharmgkb/edges.tsv - STRING: + - data/transformed/pharmgkb/nodes.tsv + - data/transformed/pharmgkb/edges.tsv + STRING: type: tsv filename: - - data/transformed/STRING/nodes.tsv - - data/transformed/STRING/edges.tsv + - data/transformed/STRING/nodes.tsv + - data/transformed/STRING/edges.tsv filters: - node_filters: - category: - - biolink:Gene - - biolink:Protein - edge_filters: - subject_category: - - biolink:Gene - - biolink:Protein - object_category: - - biolink:Gene - - biolink:Protein - edge_label: - - biolink:interacts_with - - biolink:has_gene_product + node_filters: + category: + - biolink:Gene + - biolink:Protein + edge_filters: + subject_category: + - biolink:Gene + - biolink:Protein + object_category: + - biolink:Gene + - biolink:Protein + edge_label: + - biolink:interacts_with + - biolink:has_gene_product operations: - - name: kgx.utils.graph_utils.remap_node_identifier - args: + - name: kgx.utils.graph_utils.remap_node_identifier + args: category: biolink:Protein alternative_property: xrefs prefix: UniProtKB - - ttd: + ttd: type: tsv filename: - - data/transformed/ttd/nodes.tsv - - data/transformed/ttd/edges.tsv - zhou-host-proteins: + - data/transformed/ttd/nodes.tsv + - data/transformed/ttd/edges.tsv + zhou-host-proteins: type: tsv filename: - - data/transformed/zhou_host_proteins/nodes.tsv - - data/transformed/zhou_host_proteins/edges.tsv - SciBite-CORD-19: + - data/transformed/zhou_host_proteins/nodes.tsv + - data/transformed/zhou_host_proteins/edges.tsv + SciBite-CORD-19: type: tsv filename: - - data/transformed/SciBite-CORD-19/nodes.tsv - - data/transformed/SciBite-CORD-19/edges.tsv - sars-cov-2-gene-annot: + - data/transformed/SciBite-CORD-19/nodes.tsv + - data/transformed/SciBite-CORD-19/edges.tsv + sars-cov-2-gene-annot: type: tsv filename: - - data/transformed/sars_cov_2_gene_annot/nodes.tsv - - data/transformed/sars_cov_2_gene_annot/edges.tsv - intact: + - data/transformed/sars_cov_2_gene_annot/nodes.tsv + - data/transformed/sars_cov_2_gene_annot/edges.tsv + intact: type: tsv filename: - - data/transformed/intact/nodes.tsv - - data/transformed/intact/edges.tsv - chembl: + - data/transformed/intact/nodes.tsv + - data/transformed/intact/edges.tsv + chembl: type: tsv filename: - - data/transformed/ChEMBL/nodes.tsv - - data/transformed/ChEMBL/edges.tsv - gene-ontology: + - data/transformed/ChEMBL/nodes.tsv + - data/transformed/ChEMBL/edges.tsv + gene-ontology: type: tsv filename: - - data/transformed/ontologies/go-plus_nodes.tsv - - data/transformed/ontologies/go-plus_edges.tsv - mondo-ontology: + - data/transformed/ontologies/go-plus_nodes.tsv + - data/transformed/ontologies/go-plus_edges.tsv + mondo-ontology: type: tsv filename: - - data/transformed/ontologies/mondo_nodes.tsv - - data/transformed/ontologies/mondo_edges.tsv - hp-ontology: + - data/transformed/ontologies/mondo_nodes.tsv + - data/transformed/ontologies/mondo_edges.tsv + hp-ontology: type: tsv filename: - - data/transformed/ontologies/hp_nodes.tsv - - data/transformed/ontologies/hp_edges.tsv - go-cams: + - data/transformed/ontologies/hp_nodes.tsv + - data/transformed/ontologies/hp_edges.tsv + go-cams: type: tsv filename: - - data/transformed/GOCAMs/GOCAMs_nodes.tsv - - data/transformed/GOCAMs/GOCAMs_edges.tsv -destination: - merged-kg-tsv: + - data/transformed/GOCAMs/GOCAMs_nodes.tsv + - data/transformed/GOCAMs/GOCAMs_edges.tsv + operations: + - name: kgx.operations.summarize_graph.generate_graph_stats + args: + graph_name: KG-COVID-19 Graph + filename: merged_graph_stats.yaml + node_facet_properties: + - provided_by + edge_facet_properties: + - provided_by + destination: + merged-kg-tsv: type: tsv - filename: data/merged/merged-kg - merged-kg-nt: - type: nt.gz - filename: data/merged/merged-kg.nt.gz -# merged-kg-neo4j: + compression: tar.gz + filename: merged-kg + merged-kg-nt: + type: nt + compression: gz + filename: merged-kg.nt.gz +# merged-kg-neo4j: # type: neo4j # uri: http://localhost:8484 # username: neo4j diff --git a/requirements.txt b/requirements.txt index 7081de3f..0cfd8611 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1 @@ -git+git://github.com/deepakunni3/kgx +git+git://github.com/deepakunni3/kgx@kg-covid-19 diff --git a/run.py b/run.py index 31365829..77ce8ea7 100644 --- a/run.py +++ b/run.py @@ -68,18 +68,20 @@ def transform(*args, **kwargs) -> None: @cli.command() @click.option('yaml', '-y', default="merge.yaml", type=click.Path(exists=True)) -def merge(yaml: str) -> None: +@click.option('processes', '-p', default=1, type=int) +def merge(yaml: str, processes: int) -> None: """Use KGX to load subgraphs to create a merged graph. Args: yaml: A string pointing to a KGX compatible config YAML. + processes: Number of processes to use. Returns: None. """ - load_and_merge(yaml) + load_and_merge(yaml, processes) @cli.command() From 670900c08f331aa18461ff7b6583a9c033f2b801 Mon Sep 17 00:00:00 2001 From: Deepak Unni Date: Sat, 12 Sep 2020 16:11:13 -0700 Subject: [PATCH 008/130] Set checkpoint to false in merge.yaml --- merge.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/merge.yaml b/merge.yaml index 0dca59c4..8f9a6774 100644 --- a/merge.yaml +++ b/merge.yaml @@ -1,7 +1,7 @@ --- configuration: output_directory: data/merged - checkpoint: true + checkpoint: false property_types: # define the type for non-canonical node/edge properties combined_score: 'xsd:float' From f74203afb1d158c39ec37a8c767e51085342589a Mon Sep 17 00:00:00 2001 From: Deepak Unni Date: Sat, 12 Sep 2020 18:54:34 -0700 Subject: [PATCH 009/130] Fix tests --- tests/resources/merge_MISSING_FILE.yaml | 13 +++++++------ tests/resources/merge_valid.yaml | 13 +++++++------ 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/tests/resources/merge_MISSING_FILE.yaml b/tests/resources/merge_MISSING_FILE.yaml index 780de0d0..d58ef132 100644 --- a/tests/resources/merge_MISSING_FILE.yaml +++ b/tests/resources/merge_MISSING_FILE.yaml @@ -1,10 +1,11 @@ --- -target: - drug-central: +merged_graph: + targets: + drug-central: type: tsv filename: - - data/transformed/drug_central/nodes.tsv - - data/transformed/drug_central/DOESNT_EXIST.tsv + - data/transformed/drug_central/nodes.tsv + - data/transformed/drug_central/DOESNT_EXIST.tsv destination: - type: tsv - filename: merged-kg + type: tsv + filename: merged-kg diff --git a/tests/resources/merge_valid.yaml b/tests/resources/merge_valid.yaml index f3027ecc..74e5b556 100644 --- a/tests/resources/merge_valid.yaml +++ b/tests/resources/merge_valid.yaml @@ -1,10 +1,11 @@ --- -target: - drug-central: +merged_graph: + targets: + drug-central: type: tsv filename: - - data/transformed/drug_central/nodes.tsv - - data/transformed/drug_central/edges.tsv + - data/transformed/drug_central/nodes.tsv + - data/transformed/drug_central/edges.tsv destination: - type: tsv - filename: merged-kg + type: tsv + filename: merged-kg From b054998383d59ec16da1cc27f23805620ab6f5b5 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 14 Sep 2020 12:07:13 -0700 Subject: [PATCH 010/130] Fix tabs --- Jenkinsfile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 98017067..d9676a47 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -148,14 +148,14 @@ pipeline { // // Build the new build directory index.html // - sh 'python3 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data/$BUILDSTARTDATE --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/$BUILDSTARTDATE/ > build-index.html' - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put build-index.html s3://kg-hub-public-data/$BUILDSTARTDATE/index.html' + sh 'python3 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data/$BUILDSTARTDATE --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/$BUILDSTARTDATE/ > build-index.html' + sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put build-index.html s3://kg-hub-public-data/$BUILDSTARTDATE/index.html' // Build the new build stats subdirectory index.html - sh 'python3 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data/$BUILDSTARTDATE/stats --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/$BUILDSTARTDATE/stats/ > stats-index.html' - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put stats-index.html s3://kg-hub-public-data/$BUILDSTARTDATE/stats/index.html' + sh 'python3 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data/$BUILDSTARTDATE/stats --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/$BUILDSTARTDATE/stats/ > stats-index.html' + sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put stats-index.html s3://kg-hub-public-data/$BUILDSTARTDATE/stats/index.html' // Build the new build transformed subdirectory index.html - sh 'python3 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data/$BUILDSTARTDATE/transformed --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/$BUILDSTARTDATE/transformed/ > transformed-index.html' - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put transformed-index.html s3://kg-hub-public-data/$BUILDSTARTDATE/transformed/index.html' + sh 'python3 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data/$BUILDSTARTDATE/transformed --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/$BUILDSTARTDATE/transformed/ > transformed-index.html' + sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put transformed-index.html s3://kg-hub-public-data/$BUILDSTARTDATE/transformed/index.html' // // make $BUILDSTARTDATE the new current/ From 23f0b3fb9925c178bb3970fbf4066d4669dc926f Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 14 Sep 2020 12:18:49 -0700 Subject: [PATCH 011/130] Remove extra pip install commands for KGX --- Jenkinsfile | 1 - 1 file changed, 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index d9676a47..f1a48ed3 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -44,7 +44,6 @@ pipeline { ) sh '/usr/bin/python3.7 -m venv venv' sh '. venv/bin/activate' - sh './venv/bin/pip install networkx pandas ordered_set requests pyyaml stringcase biolinkml bmt cachetools pystache neo4jrestclient validators' sh './venv/bin/pip install -r requirements.txt' sh './venv/bin/pip install .' } From 05cf8d24f0bbef20e9ace26ebf7052af96f1c625 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 14 Sep 2020 12:31:03 -0700 Subject: [PATCH 012/130] Add back pip install stuff --- Jenkinsfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Jenkinsfile b/Jenkinsfile index f1a48ed3..d9676a47 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -44,6 +44,7 @@ pipeline { ) sh '/usr/bin/python3.7 -m venv venv' sh '. venv/bin/activate' + sh './venv/bin/pip install networkx pandas ordered_set requests pyyaml stringcase biolinkml bmt cachetools pystache neo4jrestclient validators' sh './venv/bin/pip install -r requirements.txt' sh './venv/bin/pip install .' } From 0c988d5459ca06252f54217247454f9a02f89a56 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 14 Sep 2020 15:53:13 -0700 Subject: [PATCH 013/130] Bump python to 3.8 --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index d9676a47..8b589a51 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -42,7 +42,7 @@ pipeline { url: 'https://github.com/Knowledge-Graph-Hub/kg-covid-19', branch: env.BRANCH_NAME ) - sh '/usr/bin/python3.7 -m venv venv' + sh '/usr/bin/python3.8 -m venv venv' sh '. venv/bin/activate' sh './venv/bin/pip install networkx pandas ordered_set requests pyyaml stringcase biolinkml bmt cachetools pystache neo4jrestclient validators' sh './venv/bin/pip install -r requirements.txt' From 9b4160efce5a7a9e3e8b823f3fc5e389f310d645 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 14 Sep 2020 16:29:20 -0700 Subject: [PATCH 014/130] Bump python to 3.8 in setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 65cfe157..65e97ba5 100644 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ def find_version(*file_paths): url='https://github.com/justaddcoffee/kg-emerging-viruses', author='justaddcoffee+github@gmail.com', author_email='Justin Reese', - python_requires='>=3.7', + python_requires='>=3.8', # choose your license license='BSD-3', From 9fee4e63b6f7c08d3c1abeced243a2d7e5b64e57 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 14 Sep 2020 16:29:38 -0700 Subject: [PATCH 015/130] Add version to bmt install --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 8b589a51..ec1aec05 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -44,7 +44,7 @@ pipeline { ) sh '/usr/bin/python3.8 -m venv venv' sh '. venv/bin/activate' - sh './venv/bin/pip install networkx pandas ordered_set requests pyyaml stringcase biolinkml bmt cachetools pystache neo4jrestclient validators' + sh './venv/bin/pip install networkx pandas ordered_set requests pyyaml stringcase biolinkml bmt==1.5.8 cachetools pystache neo4jrestclient validators' sh './venv/bin/pip install -r requirements.txt' sh './venv/bin/pip install .' } From ed5184fed9671f0be3a30b11ca15f45be8e57428 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 14 Sep 2020 16:40:47 -0700 Subject: [PATCH 016/130] Correct version requirement for biolinkml --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index ec1aec05..933570fb 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -44,7 +44,7 @@ pipeline { ) sh '/usr/bin/python3.8 -m venv venv' sh '. venv/bin/activate' - sh './venv/bin/pip install networkx pandas ordered_set requests pyyaml stringcase biolinkml bmt==1.5.8 cachetools pystache neo4jrestclient validators' + sh './venv/bin/pip install networkx pandas ordered_set requests pyyaml stringcase biolinkml==1.5.8 bmt cachetools pystache neo4jrestclient validators' sh './venv/bin/pip install -r requirements.txt' sh './venv/bin/pip install .' } From b95b9d9dc6f4ecfc43919f9ac7b440b735a97c74 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 14 Sep 2020 17:11:58 -0700 Subject: [PATCH 017/130] Try biolinkml version 1.5.7 --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 933570fb..c07a159c 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -44,7 +44,7 @@ pipeline { ) sh '/usr/bin/python3.8 -m venv venv' sh '. venv/bin/activate' - sh './venv/bin/pip install networkx pandas ordered_set requests pyyaml stringcase biolinkml==1.5.8 bmt cachetools pystache neo4jrestclient validators' + sh './venv/bin/pip install networkx pandas ordered_set requests pyyaml stringcase biolinkml==1.5.7 bmt cachetools pystache neo4jrestclient validators' sh './venv/bin/pip install -r requirements.txt' sh './venv/bin/pip install .' } From e4d412a8f0b017d108b36c18cdfd93cbd77087f9 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 14 Sep 2020 17:41:58 -0700 Subject: [PATCH 018/130] Add wheel to pip install --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index c07a159c..692d427e 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -44,7 +44,7 @@ pipeline { ) sh '/usr/bin/python3.8 -m venv venv' sh '. venv/bin/activate' - sh './venv/bin/pip install networkx pandas ordered_set requests pyyaml stringcase biolinkml==1.5.7 bmt cachetools pystache neo4jrestclient validators' + sh './venv/bin/pip install wheel networkx pandas ordered_set requests pyyaml stringcase biolinkml==1.5.7 bmt cachetools pystache neo4jrestclient validators' sh './venv/bin/pip install -r requirements.txt' sh './venv/bin/pip install .' } From 2428135e189297a20dbb4d3aa44f0f835c120bfb Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 15 Sep 2020 12:04:49 -0700 Subject: [PATCH 019/130] Bump python requirement in .travis.yaml to 3.8 to match setup.py --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 5f105a31..852271f7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,7 @@ addons: secure: "RqYy9rM6eZAD8aFcF9xejrCG0xpQ87R2tIGH18h1d3VNp4/2gvWQRRmSK+HW2lQ1td3+PFetjY1nrZjqEf6Gs94OdCmsgmRYYogtICrdhCmYj96KSRkLCHCWKAsyxaFjEHECEA/6pRnmxOaK6avUJ56NmfGteDtSY+tjlmd7WUF5iul2gxtcJ3Wyhp6ddsZ9oUjj5miutB3+3+v0V9T95o0arNMJrO76YDIMXqd7wA2IEmeqUjmNOlVNIXSNuOIp1/4ffme7MjPWtWxzO0QPpKbsci8bKidO+CO6iguhmkXaRlaC0AHiiDiGoHEhSlPY5ltS8g6A4tvefn4VDTBYaGYeuOLJwJr07G63u7DB+oUJdNLGdtX28Y6mjhyxSi5sQbzg2xV9ldv49lcpQRIV2asFVYMJ+iZxSPmwiTeUOnjulUFx4S0R90Ed3j1/UfWvgAgXhZ68sjHtuCKlMQbvIfnsy3SnTeIarJ/jwUZQEiRHvO0/j5KEbtI2dvmzAr9rtBdNT2g4Q2YeMRyJqBcL61P5offZUZrLATU0zMpDqyaWeJaWFsXU/dV3zBA7FZKM9gCbm48J/jdeZU2leQikw7I1Hzx5mXaT3gGR6rGF03b/e/KbLMr61+79/7/xJmqWaCHD4OW2YfrD+RDbqMscvhEJHV3NoC36ikuV/pUs0wY=" language: python python: -- '3.7' +- '3.8' before_install: - sudo apt-get install openjdk-9-jdk-headless - pip install --upgrade pytest From 04e687b5664f5933e4a4b96ed595b296fbec84af Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Wed, 16 Sep 2020 15:17:01 -0700 Subject: [PATCH 020/130] Skip transform step for now, for testing merge step --- Jenkinsfile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index fa28f2cf..73210f18 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -80,8 +80,9 @@ pipeline { stage('Transform') { steps { dir('./gitrepo') { - sh 'env' - sh '. venv/bin/activate && env && python3.7 run.py transform' +// sh 'env' +// sh '. venv/bin/activate && env && python3.7 run.py transform' + sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=plain/text get -r s3://kg-hub-public-data/transformed data/' } } } From b9f28a6bfbefd5142e48e855fd4d068fb4ccb5db Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Wed, 16 Sep 2020 15:22:17 -0700 Subject: [PATCH 021/130] Revert to python3.7 --- Jenkinsfile | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 73210f18..edced5dd 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -42,7 +42,7 @@ pipeline { url: 'https://github.com/Knowledge-Graph-Hub/kg-covid-19', branch: env.BRANCH_NAME ) - sh '/usr/bin/python3.8 -m venv venv' + sh '/usr/bin/python3.7 -m venv venv' sh '. venv/bin/activate' sh './venv/bin/pip install -r requirements.txt' sh './venv/bin/pip install .' diff --git a/setup.py b/setup.py index 65e97ba5..65cfe157 100644 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ def find_version(*file_paths): url='https://github.com/justaddcoffee/kg-emerging-viruses', author='justaddcoffee+github@gmail.com', author_email='Justin Reese', - python_requires='>=3.8', + python_requires='>=3.7', # choose your license license='BSD-3', From 0788e24b966556d003a5574e77d0f01ffec9db34 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Wed, 16 Sep 2020 15:37:37 -0700 Subject: [PATCH 022/130] Fix credentials in transform step --- Jenkinsfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index edced5dd..5bf1eb4f 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -82,7 +82,9 @@ pipeline { dir('./gitrepo') { // sh 'env' // sh '. venv/bin/activate && env && python3.7 run.py transform' - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=plain/text get -r s3://kg-hub-public-data/transformed data/' + withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_JSON')]) { + sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=plain/text get -r s3://kg-hub-public-data/transformed data/' + } } } } From 6a2d1bf46b3481d328b41092c567a027f65392e8 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Wed, 16 Sep 2020 16:11:37 -0700 Subject: [PATCH 023/130] Fix python version in travis --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 115f4278..48255e05 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,7 @@ addons: secure: "RqYy9rM6eZAD8aFcF9xejrCG0xpQ87R2tIGH18h1d3VNp4/2gvWQRRmSK+HW2lQ1td3+PFetjY1nrZjqEf6Gs94OdCmsgmRYYogtICrdhCmYj96KSRkLCHCWKAsyxaFjEHECEA/6pRnmxOaK6avUJ56NmfGteDtSY+tjlmd7WUF5iul2gxtcJ3Wyhp6ddsZ9oUjj5miutB3+3+v0V9T95o0arNMJrO76YDIMXqd7wA2IEmeqUjmNOlVNIXSNuOIp1/4ffme7MjPWtWxzO0QPpKbsci8bKidO+CO6iguhmkXaRlaC0AHiiDiGoHEhSlPY5ltS8g6A4tvefn4VDTBYaGYeuOLJwJr07G63u7DB+oUJdNLGdtX28Y6mjhyxSi5sQbzg2xV9ldv49lcpQRIV2asFVYMJ+iZxSPmwiTeUOnjulUFx4S0R90Ed3j1/UfWvgAgXhZ68sjHtuCKlMQbvIfnsy3SnTeIarJ/jwUZQEiRHvO0/j5KEbtI2dvmzAr9rtBdNT2g4Q2YeMRyJqBcL61P5offZUZrLATU0zMpDqyaWeJaWFsXU/dV3zBA7FZKM9gCbm48J/jdeZU2leQikw7I1Hzx5mXaT3gGR6rGF03b/e/KbLMr61+79/7/xJmqWaCHD4OW2YfrD+RDbqMscvhEJHV3NoC36ikuV/pUs0wY=" language: python python: -- '3.8' +- '3.7' before_install: - sudo apt-get install openjdk-9-jdk-headless - pip install --upgrade pytest From d9a9782cab7f6b51224f0f0e452b74d34afd4e63 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Wed, 16 Sep 2020 16:55:26 -0700 Subject: [PATCH 024/130] Freeze off raw/ data into build directory --- Jenkinsfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Jenkinsfile b/Jenkinsfile index 5bf1eb4f..0a2e66a2 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -138,6 +138,7 @@ pipeline { // transformed data sh 'rm -fr data/transformed/.gitkeep' sh 'cp -pr data/transformed $BUILDSTARTDATE/' + sh 'cp -pr data/raw $BUILDSTARTDATE/' // stats dir sh 'mkdir $BUILDSTARTDATE/stats/' sh 'cp -p *_stats.yaml $BUILDSTARTDATE/stats/' From 10b6b380d4623261e96e36f18d1acec509fa5dd5 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Wed, 16 Sep 2020 16:59:17 -0700 Subject: [PATCH 025/130] Freeze off Jenkinsfile too --- Jenkinsfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Jenkinsfile b/Jenkinsfile index 0a2e66a2..de05257a 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -139,6 +139,7 @@ pipeline { sh 'rm -fr data/transformed/.gitkeep' sh 'cp -pr data/transformed $BUILDSTARTDATE/' sh 'cp -pr data/raw $BUILDSTARTDATE/' + sh 'cp Jenkinsfile $BUILDSTARTDATE/' // stats dir sh 'mkdir $BUILDSTARTDATE/stats/' sh 'cp -p *_stats.yaml $BUILDSTARTDATE/stats/' From bc73494d787a17350352d2da4f22da5b02a93d9a Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Wed, 16 Sep 2020 17:57:34 -0700 Subject: [PATCH 026/130] Skip blazegraph journal step for now --- Jenkinsfile | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index de05257a..baa10548 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -100,21 +100,21 @@ pipeline { } } - stage('Make blazegraph journal'){ - steps { - dir('./gitrepo/blazegraph') { - git( - url: 'https://github.com/balhoff/blazegraph-runner.git', - branch: 'master' - ) - sh 'sbt stage' - sh 'pigz -d ../data/merged/merged-kg.nt.gz' - sh 'export JAVA_OPTS=-Xmx128G && ./target/universal/stage/bin/blazegraph-runner load --informat=ntriples --journal=../merged-kg.jnl --use-ontology-graph=true ../data/merged/merged-kg.nt' - sh 'pigz ../merged-kg.jnl' - sh 'pigz ../data/merged/merged-kg.nt' - } - } - } +// stage('Make blazegraph journal'){ +// steps { +// dir('./gitrepo/blazegraph') { +// git( +// url: 'https://github.com/balhoff/blazegraph-runner.git', +// branch: 'master' +// ) +// sh 'sbt stage' +// sh 'pigz -d ../data/merged/merged-kg.nt.gz' +// sh 'export JAVA_OPTS=-Xmx128G && ./target/universal/stage/bin/blazegraph-runner load --informat=ntriples --journal=../merged-kg.jnl --use-ontology-graph=true ../data/merged/merged-kg.nt' +// sh 'pigz ../merged-kg.jnl' +// sh 'pigz ../data/merged/merged-kg.nt' +// } +// } +// } stage('Publish') { steps { @@ -134,6 +134,7 @@ pipeline { sh 'mkdir $BUILDSTARTDATE/' sh 'cp -p data/merged/merged-kg.nt.gz $BUILDSTARTDATE/' sh 'cp -p data/merged/merged-kg.tar.gz $BUILDSTARTDATE/' + sh 'touch merged-kg.jnl.gz' // REMOVE sh 'cp -p merged-kg.jnl.gz $BUILDSTARTDATE/' // transformed data sh 'rm -fr data/transformed/.gitkeep' From e05247467528387967904fde4230afd3001b406f Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Thu, 17 Sep 2020 08:31:13 -0700 Subject: [PATCH 027/130] add correct check for branch (for testing) --- Jenkinsfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index baa10548..fd9aa5c4 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -120,7 +120,8 @@ pipeline { steps { dir('./gitrepo') { script { - if (env.BRANCH_NAME != 'master' || env.BRANCH_NAME != 'add_versioning_of_builds_run_jenkins') { + // if (env.BRANCH_NAME != 'master' || + if (env.BRANCH_NAME != 'add_versioning_of_builds_run_jenkins') { echo "Will not push if not on correct branch." } else { // code for building s3 index files From 63d601fde391ff17857aab8754ad0a94313cbfd8 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Fri, 18 Sep 2020 15:16:15 -0700 Subject: [PATCH 028/130] Remove merge step for faster testing --- Jenkinsfile | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index fd9aa5c4..d1dee298 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -92,10 +92,13 @@ pipeline { stage('Merge') { steps { dir('./gitrepo') { - sh '. venv/bin/activate && python3.7 run.py merge' - sh 'env' - sh 'cp merged_graph_stats.yaml merged_graph_stats_$BUILDSTARTDATE.yaml' - sh 'tar -rvf data/merged/merged-kg.tar merged_graph_stats_$BUILDSTARTDATE.yaml' +// sh '. venv/bin/activate && python3.7 run.py merge' +// sh 'env' +// sh 'cp merged_graph_stats.yaml merged_graph_stats_$BUILDSTARTDATE.yaml' +// sh 'tar -rvf data/merged/merged-kg.tar merged_graph_stats_$BUILDSTARTDATE.yaml' + sh 'touch merged_graph_stats_$BUILDSTARTDATE.yaml' + sh 'touch data/merged/merged-kg.nt.gz' + sh 'touch data/merged/merged-kg.tar.gz' } } } From 754c6d2931c86263cae8900401165a0b39dd8d1c Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Fri, 18 Sep 2020 15:26:16 -0700 Subject: [PATCH 029/130] Make data/merged directory during testing --- Jenkinsfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Jenkinsfile b/Jenkinsfile index d1dee298..ceb941f9 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -97,6 +97,7 @@ pipeline { // sh 'cp merged_graph_stats.yaml merged_graph_stats_$BUILDSTARTDATE.yaml' // sh 'tar -rvf data/merged/merged-kg.tar merged_graph_stats_$BUILDSTARTDATE.yaml' sh 'touch merged_graph_stats_$BUILDSTARTDATE.yaml' + sh 'mkdir -p data/merged/' sh 'touch data/merged/merged-kg.nt.gz' sh 'touch data/merged/merged-kg.tar.gz' } From a2b8b23294e79c48aca57133e998341924fd8daa Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Fri, 18 Sep 2020 16:42:47 -0700 Subject: [PATCH 030/130] Move gosite GH repo download in Jenkinsfile --- Jenkinsfile | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index ceb941f9..65ecacd8 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -122,16 +122,16 @@ pipeline { stage('Publish') { steps { + // code for building s3 index files + dir('./go-site') { + git branch: master, url: 'https://github.com/justaddcoffee/go-site.git' + } dir('./gitrepo') { script { // if (env.BRANCH_NAME != 'master' || if (env.BRANCH_NAME != 'add_versioning_of_builds_run_jenkins') { echo "Will not push if not on correct branch." } else { - // code for building s3 index files - dir('./go-site') { - git branch: master, url: 'https://github.com/justaddcoffee/go-site.git' - } withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_JSON')]) { // // make $BUILDSTARTDATE/ directory and sync to s3 bucket @@ -155,18 +155,6 @@ pipeline { // sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put -pr $BUILDSTARTDATE s3://kg-hub-public-data/' - // - // Build the new build directory index.html - // - sh 'python3 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data/$BUILDSTARTDATE --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/$BUILDSTARTDATE/ > build-index.html' - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put build-index.html s3://kg-hub-public-data/$BUILDSTARTDATE/index.html' - // Build the new build stats subdirectory index.html - sh 'python3 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data/$BUILDSTARTDATE/stats --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/$BUILDSTARTDATE/stats/ > stats-index.html' - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put stats-index.html s3://kg-hub-public-data/$BUILDSTARTDATE/stats/index.html' - // Build the new build transformed subdirectory index.html - sh 'python3 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data/$BUILDSTARTDATE/transformed --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/$BUILDSTARTDATE/transformed/ > transformed-index.html' - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put transformed-index.html s3://kg-hub-public-data/$BUILDSTARTDATE/transformed/index.html' - // // make $BUILDSTARTDATE the new current/ // @@ -175,7 +163,7 @@ pipeline { sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate mv s3://kg-hub-public-data/new_current s3://kg-hub-public-data/current' // Build the top level index.html - sh 'python3 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' + sh 'python3 ../go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put top-level-index.html s3://kg-hub-public-data/index.html' // Should now appear at: From 6c98694576c05b39797d61c641e190e0272f45bb Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Fri, 18 Sep 2020 17:37:40 -0700 Subject: [PATCH 031/130] Make sure code gets executed --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 65ecacd8..71aa9b6f 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -129,7 +129,7 @@ pipeline { dir('./gitrepo') { script { // if (env.BRANCH_NAME != 'master' || - if (env.BRANCH_NAME != 'add_versioning_of_builds_run_jenkins') { + if (env.BRANCH_NAME == 'NOT THIS BRANCH') { echo "Will not push if not on correct branch." } else { withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_JSON')]) { From 5861461b5df683453f03251b35d051fb117532bb Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 10:05:31 -0700 Subject: [PATCH 032/130] Fix problem with cloning go-site repo --- Jenkinsfile | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 71aa9b6f..270f4a28 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -123,11 +123,13 @@ pipeline { stage('Publish') { steps { // code for building s3 index files - dir('./go-site') { - git branch: master, url: 'https://github.com/justaddcoffee/go-site.git' - } +// dir('./go-site') { +// git branch: master, url: 'https://github.com/justaddcoffee/go-site.git' +// } dir('./gitrepo') { script { + sh 'git clone https://github.com/justaddcoffee/go-site.git' + // if (env.BRANCH_NAME != 'master' || if (env.BRANCH_NAME == 'NOT THIS BRANCH') { echo "Will not push if not on correct branch." @@ -163,7 +165,7 @@ pipeline { sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate mv s3://kg-hub-public-data/new_current s3://kg-hub-public-data/current' // Build the top level index.html - sh 'python3 ../go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' + sh 'python3 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put top-level-index.html s3://kg-hub-public-data/index.html' // Should now appear at: From 9e45288844e447ae5cd5e61ef0d5b60905a5d11a Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 10:32:07 -0700 Subject: [PATCH 033/130] Make test stats file --- Jenkinsfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Jenkinsfile b/Jenkinsfile index 270f4a28..baa8d62c 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -96,6 +96,7 @@ pipeline { // sh 'env' // sh 'cp merged_graph_stats.yaml merged_graph_stats_$BUILDSTARTDATE.yaml' // sh 'tar -rvf data/merged/merged-kg.tar merged_graph_stats_$BUILDSTARTDATE.yaml' + sh 'touch TEST_stats.yaml' sh 'touch merged_graph_stats_$BUILDSTARTDATE.yaml' sh 'mkdir -p data/merged/' sh 'touch data/merged/merged-kg.nt.gz' From a6ce620f9c99409aeb540c87e5e3f102137e5118 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 11:27:20 -0700 Subject: [PATCH 034/130] Fix cp and mv command to create new_current --- Jenkinsfile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index baa8d62c..6ad4f8dc 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -161,13 +161,13 @@ pipeline { // // make $BUILDSTARTDATE the new current/ // - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate cp -pr s3://kg-hub-public-data/$BUILDSTARTDATE s3://kg-hub-public-data/new_current' + sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate cp -pr s3://kg-hub-public-data/$BUILDSTARTDATE/ s3://kg-hub-public-data/new_current/' sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate rm -fr s3://kg-hub-public-data/current' - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate mv s3://kg-hub-public-data/new_current s3://kg-hub-public-data/current' + sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate mv --recursive s3://kg-hub-public-data/new_current s3://kg-hub-public-data/current' - // Build the top level index.html - sh 'python3 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put top-level-index.html s3://kg-hub-public-data/index.html' + // Build the top level index.html + sh 'python3 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' + sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put top-level-index.html s3://kg-hub-public-data/index.html' // Should now appear at: // https://kg-hub.berkeleybop.io/[artifact name] From 93cce7cb0c5ac592c848fd4d264309f23eb8aedb Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 12:03:43 -0700 Subject: [PATCH 035/130] Add check to fail so as to not nuke existing build dir, and verbose flag to cp command --- Jenkinsfile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 6ad4f8dc..b2784d6e 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -129,6 +129,10 @@ pipeline { // } dir('./gitrepo') { script { + if(fileExists($BUILDSTARTDATE)){ + echo "Will not overwrite existing directory: $BUILDSTARTDATE" + sh 'exit 1' + } sh 'git clone https://github.com/justaddcoffee/go-site.git' // if (env.BRANCH_NAME != 'master' || @@ -161,7 +165,7 @@ pipeline { // // make $BUILDSTARTDATE the new current/ // - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate cp -pr s3://kg-hub-public-data/$BUILDSTARTDATE/ s3://kg-hub-public-data/new_current/' + sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate cp -v -pr s3://kg-hub-public-data/$BUILDSTARTDATE/ s3://kg-hub-public-data/new_current/' sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate rm -fr s3://kg-hub-public-data/current' sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate mv --recursive s3://kg-hub-public-data/new_current s3://kg-hub-public-data/current' From 3ca5dea12cb74442a541cc335a84acad03853d1f Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 12:15:53 -0700 Subject: [PATCH 036/130] Fix quotes/interpolation bug, add block up top to check --- Jenkinsfile | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index b2784d6e..40c0454b 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -38,6 +38,10 @@ pipeline { stage('Build kg_covid_19') { steps { dir('./gitrepo') { + if(fileExists('$BUILDSTARTDATE')){ + echo "Will not overwrite existing directory: $BUILDSTARTDATE" + sh 'exit 1' + } git( url: 'https://github.com/Knowledge-Graph-Hub/kg-covid-19', branch: env.BRANCH_NAME @@ -129,7 +133,7 @@ pipeline { // } dir('./gitrepo') { script { - if(fileExists($BUILDSTARTDATE)){ + if(fileExists('$BUILDSTARTDATE')){ echo "Will not overwrite existing directory: $BUILDSTARTDATE" sh 'exit 1' } From ebe268b8587b746905d30cad5388049705c402df Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 12:19:57 -0700 Subject: [PATCH 037/130] Add check up top --- Jenkinsfile | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 40c0454b..5227754d 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -35,13 +35,20 @@ pipeline { } } - stage('Build kg_covid_19') { + stage('Check existing build directory'){ steps { - dir('./gitrepo') { + dir('./gitrepo') { if(fileExists('$BUILDSTARTDATE')){ echo "Will not overwrite existing directory: $BUILDSTARTDATE" sh 'exit 1' } + } + } + } + + stage('Build kg_covid_19') { + steps { + dir('./gitrepo') { git( url: 'https://github.com/Knowledge-Graph-Hub/kg-covid-19', branch: env.BRANCH_NAME From f479e365e5014b27b00f4475f9e2aac310987a79 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 12:21:31 -0700 Subject: [PATCH 038/130] Remove check block --- Jenkinsfile | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 5227754d..95e695f6 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -35,17 +35,6 @@ pipeline { } } - stage('Check existing build directory'){ - steps { - dir('./gitrepo') { - if(fileExists('$BUILDSTARTDATE')){ - echo "Will not overwrite existing directory: $BUILDSTARTDATE" - sh 'exit 1' - } - } - } - } - stage('Build kg_covid_19') { steps { dir('./gitrepo') { From b9fb335d5ee42818c1550adef02a127256ef6f06 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 12:24:08 -0700 Subject: [PATCH 039/130] Update Jenkinsfile --- Jenkinsfile | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Jenkinsfile b/Jenkinsfile index 95e695f6..49bb9174 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -50,6 +50,19 @@ pipeline { } } + stage('Check existing build directory'){ + steps { + dir('./gitrepo') { + if(fileExists('$BUILDSTARTDATE')){ + echo "Will not overwrite existing directory: $BUILDSTARTDATE" + sh 'exit 1' + } else { + sh 'carry on...' + } + } + } + } + stage('Download') { steps { dir('./gitrepo') { From eb8a7a2e2f8607845644dd47d7b8c58267ddb704 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 12:25:16 -0700 Subject: [PATCH 040/130] Remove test block --- Jenkinsfile | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 49bb9174..95e695f6 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -50,19 +50,6 @@ pipeline { } } - stage('Check existing build directory'){ - steps { - dir('./gitrepo') { - if(fileExists('$BUILDSTARTDATE')){ - echo "Will not overwrite existing directory: $BUILDSTARTDATE" - sh 'exit 1' - } else { - sh 'carry on...' - } - } - } - } - stage('Download') { steps { dir('./gitrepo') { From 5a6481e05c7a0ee47fd90749b33cf358cb911f14 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 13:05:38 -0700 Subject: [PATCH 041/130] Add current/ redirect --- Jenkinsfile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 95e695f6..a2daac78 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -160,14 +160,14 @@ pipeline { // // put $BUILDSTARTDATE/ in s3 bucket // - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put -pr $BUILDSTARTDATE s3://kg-hub-public-data/' + sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate --add-header "x-amz-website-redirect-location: /current/" put -pr $BUILDSTARTDATE s3://kg-hub-public-data/' // // make $BUILDSTARTDATE the new current/ - // - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate cp -v -pr s3://kg-hub-public-data/$BUILDSTARTDATE/ s3://kg-hub-public-data/new_current/' - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate rm -fr s3://kg-hub-public-data/current' - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate mv --recursive s3://kg-hub-public-data/new_current s3://kg-hub-public-data/current' + // + // sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate cp -v -pr s3://kg-hub-public-data/$BUILDSTARTDATE/ s3://kg-hub-public-data/new_current/' + // sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate rm -fr s3://kg-hub-public-data/current' + // sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate mv --recursive s3://kg-hub-public-data/new_current s3://kg-hub-public-data/current' // Build the top level index.html sh 'python3 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' From a5a5cb67c39251cad115f03fb4d264e4fb5f318d Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 13:34:55 -0700 Subject: [PATCH 042/130] Add pip install cmd's for bucket indexer --- Jenkinsfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Jenkinsfile b/Jenkinsfile index a2daac78..2f83010d 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -170,6 +170,9 @@ pipeline { // sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate mv --recursive s3://kg-hub-public-data/new_current s3://kg-hub-public-data/current' // Build the top level index.html + // "External" packages required to run these + // scripts. + sh './venv/bin/pip install click pystache yamldown pypandoc' sh 'python3 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put top-level-index.html s3://kg-hub-public-data/index.html' From 5aaf2267ff95ab85852531ce4a2073b7a26f66c7 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 13:58:46 -0700 Subject: [PATCH 043/130] Remove pypandoc install from jenkins --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 2f83010d..dad2ce71 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -172,7 +172,7 @@ pipeline { // Build the top level index.html // "External" packages required to run these // scripts. - sh './venv/bin/pip install click pystache yamldown pypandoc' + sh './venv/bin/pip install click pystache yamldown' sh 'python3 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put top-level-index.html s3://kg-hub-public-data/index.html' From 2ccbbb153b4bcf9ae7abbb0c09a9a2de41341647 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 14:05:20 -0700 Subject: [PATCH 044/130] Why isn't fileExists working --- Jenkinsfile | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index dad2ce71..344d869a 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -124,12 +124,9 @@ pipeline { stage('Publish') { steps { // code for building s3 index files -// dir('./go-site') { -// git branch: master, url: 'https://github.com/justaddcoffee/go-site.git' -// } dir('./gitrepo') { script { - if(fileExists('$BUILDSTARTDATE')){ + if(fileExists('./gitrepo/$BUILDSTARTDATE')){ echo "Will not overwrite existing directory: $BUILDSTARTDATE" sh 'exit 1' } From 2d508e893f5f44f162e9729a4688634cc6c901bf Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 14:35:06 -0700 Subject: [PATCH 045/130] Remove --add-header flag to s3cmd --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 344d869a..c769e267 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -157,7 +157,7 @@ pipeline { // // put $BUILDSTARTDATE/ in s3 bucket // - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate --add-header "x-amz-website-redirect-location: /current/" put -pr $BUILDSTARTDATE s3://kg-hub-public-data/' + sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put -pr $BUILDSTARTDATE s3://kg-hub-public-data/' // // make $BUILDSTARTDATE the new current/ From 6bf57237ad048d6c4e1218525a4832bc47e2add5 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 14:45:22 -0700 Subject: [PATCH 046/130] Speed up testing --- Jenkinsfile | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index c769e267..8568e4cf 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -69,8 +69,11 @@ pipeline { withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_JSON')]) { sh 'rm -fr data/raw || true;' sh 'mkdir -p data/raw || true' - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=plain/text get -r s3://kg-hub-public-data/raw/ data/raw/' - } + // FIX THIS + // sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=plain/text get -r s3://kg-hub-public-data/raw/ data/raw/' + sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=plain/text get -r s3://kg-hub-public-data/raw/hp.json data/raw/hp.json' + + } } } } From ba91faf0bc5357fe02e097e017f0f984d7465a76 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 14:50:59 -0700 Subject: [PATCH 047/130] Speed up tests --- Jenkinsfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 8568e4cf..2ac9c1e5 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -86,7 +86,8 @@ pipeline { // sh 'env' // sh '. venv/bin/activate && env && python3.7 run.py transform' withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_JSON')]) { - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=plain/text get -r s3://kg-hub-public-data/transformed data/' + sh 'mdkir transformed/' + sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=plain/text get -r s3://kg-hub-public-data/transformed/ttd data/transformed/' } } } From a313e0decdac06ef5612730796477bde261a0a7b Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 14:53:17 -0700 Subject: [PATCH 048/130] Typo --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 2ac9c1e5..60c45306 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -86,7 +86,7 @@ pipeline { // sh 'env' // sh '. venv/bin/activate && env && python3.7 run.py transform' withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_JSON')]) { - sh 'mdkir transformed/' + sh 'mkdir transformed/' sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=plain/text get -r s3://kg-hub-public-data/transformed/ttd data/transformed/' } } From 017869ebb99a8ce1f1ccc1491beff6d272342fcc Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 15:07:37 -0700 Subject: [PATCH 049/130] Please please please pip install pystache --- Jenkinsfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Jenkinsfile b/Jenkinsfile index 60c45306..5669e28d 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -44,6 +44,7 @@ pipeline { ) sh '/usr/bin/python3.7 -m venv venv' sh '. venv/bin/activate' + sh './venv/bin/pip install pystache' // for go-site bucket-indexer down below sh './venv/bin/pip install -r requirements.txt' sh './venv/bin/pip install .' } From 518428465ca8556d2b596282fc9dce8c1dbeab8a Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 15:11:50 -0700 Subject: [PATCH 050/130] Remove pip install pystache --- Jenkinsfile | 1 - 1 file changed, 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 5669e28d..60c45306 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -44,7 +44,6 @@ pipeline { ) sh '/usr/bin/python3.7 -m venv venv' sh '. venv/bin/activate' - sh './venv/bin/pip install pystache' // for go-site bucket-indexer down below sh './venv/bin/pip install -r requirements.txt' sh './venv/bin/pip install .' } From 281aa5e35460e279b6367416f473a4b9261465aa Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 15:13:17 -0700 Subject: [PATCH 051/130] Pleading with pip, still --- Jenkinsfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Jenkinsfile b/Jenkinsfile index 60c45306..d6ea97d5 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -135,6 +135,7 @@ pipeline { sh 'exit 1' } sh 'git clone https://github.com/justaddcoffee/go-site.git' + sh 'cd go-site && pip install . && cd ..' // if (env.BRANCH_NAME != 'master' || if (env.BRANCH_NAME == 'NOT THIS BRANCH') { From 1c6ae6a1a28f87de86690c4dd28bd7f6d75622d0 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 15:17:30 -0700 Subject: [PATCH 052/130] More pip acrobatics --- Jenkinsfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index d6ea97d5..7ddb7310 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -135,7 +135,7 @@ pipeline { sh 'exit 1' } sh 'git clone https://github.com/justaddcoffee/go-site.git' - sh 'cd go-site && pip install . && cd ..' + sh 'cd go-site && ../venv/bin/pip install . && cd ..' // if (env.BRANCH_NAME != 'master' || if (env.BRANCH_NAME == 'NOT THIS BRANCH') { @@ -174,7 +174,6 @@ pipeline { // Build the top level index.html // "External" packages required to run these // scripts. - sh './venv/bin/pip install click pystache yamldown' sh 'python3 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put top-level-index.html s3://kg-hub-public-data/index.html' From 55a1bd5fd791ab0687a295c630791beda6779c72 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 15:22:38 -0700 Subject: [PATCH 053/130] Running out of ideas for how to get pip to do it's job here --- Jenkinsfile | 1 - 1 file changed, 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 7ddb7310..8748a189 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -135,7 +135,6 @@ pipeline { sh 'exit 1' } sh 'git clone https://github.com/justaddcoffee/go-site.git' - sh 'cd go-site && ../venv/bin/pip install . && cd ..' // if (env.BRANCH_NAME != 'master' || if (env.BRANCH_NAME == 'NOT THIS BRANCH') { From e636160fd394772af706ad44f2c2b0e57c25ad83 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 15:24:26 -0700 Subject: [PATCH 054/130] Comment --- Jenkinsfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Jenkinsfile b/Jenkinsfile index 8748a189..10f50e16 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -166,6 +166,7 @@ pipeline { // // make $BUILDSTARTDATE the new current/ // + // The following cp always times out: // sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate cp -v -pr s3://kg-hub-public-data/$BUILDSTARTDATE/ s3://kg-hub-public-data/new_current/' // sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate rm -fr s3://kg-hub-public-data/current' // sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate mv --recursive s3://kg-hub-public-data/new_current s3://kg-hub-public-data/current' From 8e496e8d348d72a7d684afc759a7b4a94c002583 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 15:27:15 -0700 Subject: [PATCH 055/130] more verbose --- Jenkinsfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Jenkinsfile b/Jenkinsfile index 10f50e16..36b244c8 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -133,6 +133,8 @@ pipeline { if(fileExists('./gitrepo/$BUILDSTARTDATE')){ echo "Will not overwrite existing directory: $BUILDSTARTDATE" sh 'exit 1' + } else { + echo "$BUILDSTARTDATE doesn't exist, proceeding" } sh 'git clone https://github.com/justaddcoffee/go-site.git' From b452b32b06b4cfa2f10056757e267cd6183cd128 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 15:38:30 -0700 Subject: [PATCH 056/130] Install pystache --- Jenkinsfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Jenkinsfile b/Jenkinsfile index 36b244c8..0e5779ef 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -176,6 +176,7 @@ pipeline { // Build the top level index.html // "External" packages required to run these // scripts. + sh './venv/bin/pip install pystache' sh 'python3 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put top-level-index.html s3://kg-hub-public-data/index.html' From d4f41152d4bdd1956cc16ab4a778e2651e0071bb Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 15:43:05 -0700 Subject: [PATCH 057/130] Fix path to python --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 0e5779ef..fa3b7462 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -177,7 +177,7 @@ pipeline { // "External" packages required to run these // scripts. sh './venv/bin/pip install pystache' - sh 'python3 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' + sh '. venv/bin/activate && python3.7 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put top-level-index.html s3://kg-hub-public-data/index.html' // Should now appear at: From 3db2f8f23afe6a4822646f384248f7bb4ed6b650 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 15:50:51 -0700 Subject: [PATCH 058/130] Add boto3 --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index fa3b7462..6e43a0c9 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -176,7 +176,7 @@ pipeline { // Build the top level index.html // "External" packages required to run these // scripts. - sh './venv/bin/pip install pystache' + sh './venv/bin/pip install pystache boto3' sh '. venv/bin/activate && python3.7 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put top-level-index.html s3://kg-hub-public-data/index.html' From 5b9ec4908008043d3d9d7bb205a873b30955aed0 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 16:07:09 -0700 Subject: [PATCH 059/130] Try to get string interpolation to work --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 6e43a0c9..cfd85e5d 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -177,7 +177,7 @@ pipeline { // "External" packages required to run these // scripts. sh './venv/bin/pip install pystache boto3' - sh '. venv/bin/activate && python3.7 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' + sh ". venv/bin/activate && python3.7 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html" sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put top-level-index.html s3://kg-hub-public-data/index.html' // Should now appear at: From 759f22044cec3db7bfbbb07694398476395271de Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 16:38:47 -0700 Subject: [PATCH 060/130] Why isn't S3CMD_JSON being interpolated --- Jenkinsfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index cfd85e5d..98a38703 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -143,6 +143,7 @@ pipeline { echo "Will not push if not on correct branch." } else { withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_JSON')]) { + sh '. venv/bin/activate' // // make $BUILDSTARTDATE/ directory and sync to s3 bucket // @@ -177,7 +178,7 @@ pipeline { // "External" packages required to run these // scripts. sh './venv/bin/pip install pystache boto3' - sh ". venv/bin/activate && python3.7 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html" + sh 'python3.7 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put top-level-index.html s3://kg-hub-public-data/index.html' // Should now appear at: From 7f69edd6b523450399e889ef08953b467d7ddcbc Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 16:45:05 -0700 Subject: [PATCH 061/130] Try this --- Jenkinsfile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 98a38703..c4a79cce 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -143,7 +143,6 @@ pipeline { echo "Will not push if not on correct branch." } else { withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_JSON')]) { - sh '. venv/bin/activate' // // make $BUILDSTARTDATE/ directory and sync to s3 bucket // @@ -177,8 +176,10 @@ pipeline { // Build the top level index.html // "External" packages required to run these // scripts. - sh './venv/bin/pip install pystache boto3' - sh 'python3.7 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' + withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_JSON')]) { + sh './venv/bin/pip install pystache boto3' + sh '. venv/bin/activate && python3.7 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' + } sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put top-level-index.html s3://kg-hub-public-data/index.html' // Should now appear at: From 4b39042f2b81b8363c54ff125f965586b9f4c4ad Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 16:49:00 -0700 Subject: [PATCH 062/130] Back it up --- Jenkinsfile | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index c4a79cce..6e43a0c9 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -176,10 +176,8 @@ pipeline { // Build the top level index.html // "External" packages required to run these // scripts. - withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_JSON')]) { - sh './venv/bin/pip install pystache boto3' - sh '. venv/bin/activate && python3.7 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' - } + sh './venv/bin/pip install pystache boto3' + sh '. venv/bin/activate && python3.7 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put top-level-index.html s3://kg-hub-public-data/index.html' // Should now appear at: From a554da1aa4e9138edfbcd075913247ab316aa98c Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 16:51:39 -0700 Subject: [PATCH 063/130] print env --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 6e43a0c9..a1e339b1 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -177,7 +177,7 @@ pipeline { // "External" packages required to run these // scripts. sh './venv/bin/pip install pystache boto3' - sh '. venv/bin/activate && python3.7 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' + sh '. venv/bin/activate && env && python3.7 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put top-level-index.html s3://kg-hub-public-data/index.html' // Should now appear at: From 1e638d65d0e880c6841d1aaa5a9ae796d0e698cb Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 16:55:45 -0700 Subject: [PATCH 064/130] Fix credentials string --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index a1e339b1..9cbe5a81 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -177,7 +177,7 @@ pipeline { // "External" packages required to run these // scripts. sh './venv/bin/pip install pystache boto3' - sh '. venv/bin/activate && env && python3.7 ./go-site/scripts/bucket-indexer.py --credentials $S3_PUSH_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' + sh '. venv/bin/activate && env && python3.7 ./go-site/scripts/bucket-indexer.py --credentials $S3CMD_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put top-level-index.html s3://kg-hub-public-data/index.html' // Should now appear at: From 3f7af50f5614ca496865920bb598e3b73fb4fd92 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 17:28:47 -0700 Subject: [PATCH 065/130] Add aws json variable --- Jenkinsfile | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 9cbe5a81..fe729915 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -142,7 +142,10 @@ pipeline { if (env.BRANCH_NAME == 'NOT THIS BRANCH') { echo "Will not push if not on correct branch." } else { - withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_JSON')]) { + withCredentials([ + file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_CFG'), + file(credentialsId: 'aws_kg_hub_push_json', variable: 'AWS_JSON') + ]) { // // make $BUILDSTARTDATE/ directory and sync to s3 bucket // @@ -163,22 +166,22 @@ pipeline { // // put $BUILDSTARTDATE/ in s3 bucket // - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put -pr $BUILDSTARTDATE s3://kg-hub-public-data/' + sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate put -pr $BUILDSTARTDATE s3://kg-hub-public-data/' // // make $BUILDSTARTDATE the new current/ // // The following cp always times out: - // sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate cp -v -pr s3://kg-hub-public-data/$BUILDSTARTDATE/ s3://kg-hub-public-data/new_current/' - // sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate rm -fr s3://kg-hub-public-data/current' - // sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate mv --recursive s3://kg-hub-public-data/new_current s3://kg-hub-public-data/current' + // sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate cp -v -pr s3://kg-hub-public-data/$BUILDSTARTDATE/ s3://kg-hub-public-data/new_current/' + // sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate rm -fr s3://kg-hub-public-data/current' + // sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate mv --recursive s3://kg-hub-public-data/new_current s3://kg-hub-public-data/current' // Build the top level index.html // "External" packages required to run these // scripts. sh './venv/bin/pip install pystache boto3' - sh '. venv/bin/activate && env && python3.7 ./go-site/scripts/bucket-indexer.py --credentials $S3CMD_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=text/html --cf-invalidate put top-level-index.html s3://kg-hub-public-data/index.html' + sh '. venv/bin/activate && python3.7 ./go-site/scripts/bucket-indexer.py --credentials $AWS_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' + sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate put top-level-index.html s3://kg-hub-public-data/index.html' // Should now appear at: // https://kg-hub.berkeleybop.io/[artifact name] From 030cbb6ca6903dff300e662c93080257d42b04bb Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 17:40:24 -0700 Subject: [PATCH 066/130] Build subdirectory index.html's and remove 1 min wait --- Jenkinsfile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index fe729915..26ddb65e 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -14,7 +14,7 @@ pipeline { stage('Ready and clean') { steps { // Give us a minute to cancel if we want. - sleep time: 1, unit: 'MINUTES' + // sleep time: 1, unit: 'MINUTES' cleanWs() } } @@ -166,7 +166,8 @@ pipeline { // // put $BUILDSTARTDATE/ in s3 bucket // - sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate put -pr $BUILDSTARTDATE s3://kg-hub-public-data/' + sh '. venv/bin/activate && python3.7 ./scripts/directory_indexer.py -v --inject ./go-site/scripts/directory-index-template.html --directory $BUILDSTARTDATE ---prefix https://kg-hub.berkeleybop.io/ -x' + sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate put -pr $BUILDSTARTDATE s3://kg-hub-public-data/' // // make $BUILDSTARTDATE the new current/ From 5ba2ceac738c1b73befdb7ee3a73d8f50746fa3a Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 17:43:44 -0700 Subject: [PATCH 067/130] Fix path --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 26ddb65e..d6addedc 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -166,7 +166,7 @@ pipeline { // // put $BUILDSTARTDATE/ in s3 bucket // - sh '. venv/bin/activate && python3.7 ./scripts/directory_indexer.py -v --inject ./go-site/scripts/directory-index-template.html --directory $BUILDSTARTDATE ---prefix https://kg-hub.berkeleybop.io/ -x' + sh '. venv/bin/activate && python3.7 ./go-site/scripts/directory_indexer.py -v --inject ./go-site/scripts/directory-index-template.html --directory $BUILDSTARTDATE ---prefix https://kg-hub.berkeleybop.io/ -x' sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate put -pr $BUILDSTARTDATE s3://kg-hub-public-data/' // From 86caf0cf3789690bb13d1bf28a19004d1af21068 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 17:47:50 -0700 Subject: [PATCH 068/130] Triple dash typo --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index d6addedc..a4e51105 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -166,7 +166,7 @@ pipeline { // // put $BUILDSTARTDATE/ in s3 bucket // - sh '. venv/bin/activate && python3.7 ./go-site/scripts/directory_indexer.py -v --inject ./go-site/scripts/directory-index-template.html --directory $BUILDSTARTDATE ---prefix https://kg-hub.berkeleybop.io/ -x' + sh '. venv/bin/activate && python3.7 ./go-site/scripts/directory_indexer.py -v --inject ./go-site/scripts/directory-index-template.html --directory $BUILDSTARTDATE --prefix https://kg-hub.berkeleybop.io/ -x' sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate put -pr $BUILDSTARTDATE s3://kg-hub-public-data/' // From 62731bb8d1ba31b66014783b71b3be663d8b058e Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 18:11:44 -0700 Subject: [PATCH 069/130] Trailing slash --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index a4e51105..e5515ff2 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -166,7 +166,7 @@ pipeline { // // put $BUILDSTARTDATE/ in s3 bucket // - sh '. venv/bin/activate && python3.7 ./go-site/scripts/directory_indexer.py -v --inject ./go-site/scripts/directory-index-template.html --directory $BUILDSTARTDATE --prefix https://kg-hub.berkeleybop.io/ -x' + sh '. venv/bin/activate && python3.7 ./go-site/scripts/directory_indexer.py -v --inject ./go-site/scripts/directory-index-template.html --directory $BUILDSTARTDATE/ --prefix https://kg-hub.berkeleybop.io/ -x' sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate put -pr $BUILDSTARTDATE s3://kg-hub-public-data/' // From 1ff38997fe8caf474be5d5e63b7ccf3f6c99febe Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 18:16:09 -0700 Subject: [PATCH 070/130] Remove trailing slash --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index e5515ff2..a4e51105 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -166,7 +166,7 @@ pipeline { // // put $BUILDSTARTDATE/ in s3 bucket // - sh '. venv/bin/activate && python3.7 ./go-site/scripts/directory_indexer.py -v --inject ./go-site/scripts/directory-index-template.html --directory $BUILDSTARTDATE/ --prefix https://kg-hub.berkeleybop.io/ -x' + sh '. venv/bin/activate && python3.7 ./go-site/scripts/directory_indexer.py -v --inject ./go-site/scripts/directory-index-template.html --directory $BUILDSTARTDATE --prefix https://kg-hub.berkeleybop.io/ -x' sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate put -pr $BUILDSTARTDATE s3://kg-hub-public-data/' // From 031177d07f8518bcac2ea8514c5f91e46b216465 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 18:32:48 -0700 Subject: [PATCH 071/130] Try sync instead of cp for cf invalidation --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index a4e51105..e4bc8cd9 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -182,7 +182,7 @@ pipeline { // scripts. sh './venv/bin/pip install pystache boto3' sh '. venv/bin/activate && python3.7 ./go-site/scripts/bucket-indexer.py --credentials $AWS_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' - sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate put top-level-index.html s3://kg-hub-public-data/index.html' + sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate sync top-level-index.html s3://kg-hub-public-data/index.html' // Should now appear at: // https://kg-hub.berkeleybop.io/[artifact name] From 92ca8d9966e0338181e974c67edebe1c966f2eb7 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 18:40:58 -0700 Subject: [PATCH 072/130] Fix incorrectly named env var (S3CMD_JSON -> S3CMD_CFG) --- Jenkinsfile | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index e4bc8cd9..aaf13dc2 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -61,17 +61,17 @@ pipeline { if (env.BRANCH_NAME != 'master') { // upload raw to s3 if we're on correct branch echo "Will not push if not on correct branch." } else { - withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_JSON')]) { - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=plain/text --cf-invalidate put -r data/raw s3://kg-hub-public-data/' + withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_CFG')]) { + sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=plain/text --cf-invalidate put -r data/raw s3://kg-hub-public-data/' } } } else { // 'run.py download' failed - let's try to download last good copy of raw/ from s3 to data/ - withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_JSON')]) { + withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_CFG')]) { sh 'rm -fr data/raw || true;' sh 'mkdir -p data/raw || true' // FIX THIS - // sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=plain/text get -r s3://kg-hub-public-data/raw/ data/raw/' - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=plain/text get -r s3://kg-hub-public-data/raw/hp.json data/raw/hp.json' + // sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=plain/text get -r s3://kg-hub-public-data/raw/ data/raw/' + sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=plain/text get -r s3://kg-hub-public-data/raw/hp.json data/raw/hp.json' } } @@ -85,9 +85,9 @@ pipeline { dir('./gitrepo') { // sh 'env' // sh '. venv/bin/activate && env && python3.7 run.py transform' - withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_JSON')]) { + withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_CFG')]) { sh 'mkdir transformed/' - sh 's3cmd -c $S3CMD_JSON --acl-public --mime-type=plain/text get -r s3://kg-hub-public-data/transformed/ttd data/transformed/' + sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=plain/text get -r s3://kg-hub-public-data/transformed/ttd data/transformed/' } } } From 324f1a95aae46b8609d31bd4393f9f6c7b85e862 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 18:45:13 -0700 Subject: [PATCH 073/130] Change sync to cp --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index aaf13dc2..c94b4fda 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -182,7 +182,7 @@ pipeline { // scripts. sh './venv/bin/pip install pystache boto3' sh '. venv/bin/activate && python3.7 ./go-site/scripts/bucket-indexer.py --credentials $AWS_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' - sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate sync top-level-index.html s3://kg-hub-public-data/index.html' + sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate cp top-level-index.html s3://kg-hub-public-data/index.html' // Should now appear at: // https://kg-hub.berkeleybop.io/[artifact name] From d742b5ef20133d0106872e816a9079617fcbe212 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 18:55:54 -0700 Subject: [PATCH 074/130] Change cp to put --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index c94b4fda..e7fe9af0 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -182,7 +182,7 @@ pipeline { // scripts. sh './venv/bin/pip install pystache boto3' sh '. venv/bin/activate && python3.7 ./go-site/scripts/bucket-indexer.py --credentials $AWS_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' - sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate cp top-level-index.html s3://kg-hub-public-data/index.html' + sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate put top-level-index.html s3://kg-hub-public-data/index.html' // Should now appear at: // https://kg-hub.berkeleybop.io/[artifact name] From 47225d1cd82563ac7ee0a327be8937891bcecfd0 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Mon, 21 Sep 2020 18:59:22 -0700 Subject: [PATCH 075/130] try --cf-invalidate-default-index --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index e7fe9af0..27973e26 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -182,7 +182,7 @@ pipeline { // scripts. sh './venv/bin/pip install pystache boto3' sh '. venv/bin/activate && python3.7 ./go-site/scripts/bucket-indexer.py --credentials $AWS_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' - sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate put top-level-index.html s3://kg-hub-public-data/index.html' + sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate --cf-invalidate-default-index put top-level-index.html s3://kg-hub-public-data/index.html' // Should now appear at: // https://kg-hub.berkeleybop.io/[artifact name] From de9236793a926e11c565a0c2c69a4e667bb587af Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 08:24:34 -0700 Subject: [PATCH 076/130] Remove invalidation default index, fix (hopefully) the fileExists check --- Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 27973e26..af8bab49 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -130,7 +130,7 @@ pipeline { // code for building s3 index files dir('./gitrepo') { script { - if(fileExists('./gitrepo/$BUILDSTARTDATE')){ + if(fileExists($BUILDSTARTDATE)){ echo "Will not overwrite existing directory: $BUILDSTARTDATE" sh 'exit 1' } else { @@ -182,7 +182,7 @@ pipeline { // scripts. sh './venv/bin/pip install pystache boto3' sh '. venv/bin/activate && python3.7 ./go-site/scripts/bucket-indexer.py --credentials $AWS_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' - sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate --cf-invalidate-default-index put top-level-index.html s3://kg-hub-public-data/index.html' + sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate put top-level-index.html s3://kg-hub-public-data/index.html' // Should now appear at: // https://kg-hub.berkeleybop.io/[artifact name] From aa2eb8a2a584e31825fdfcfd3afdfb59e96c85b0 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 08:30:07 -0700 Subject: [PATCH 077/130] Pretty please simon says interpolate my variable --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index af8bab49..9d8c765b 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -130,7 +130,7 @@ pipeline { // code for building s3 index files dir('./gitrepo') { script { - if(fileExists($BUILDSTARTDATE)){ + if(fileExists('$BUILDSTARTDATE')){ echo "Will not overwrite existing directory: $BUILDSTARTDATE" sh 'exit 1' } else { From 1252e290e2d112f4837403c35b360a3952e8433c Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 09:34:39 -0700 Subject: [PATCH 078/130] Check for remote BUILDSTARTDATE dir --- Jenkinsfile | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 9d8c765b..30dad80d 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -136,7 +136,14 @@ pipeline { } else { echo "$BUILDSTARTDATE doesn't exist, proceeding" } - sh 'git clone https://github.com/justaddcoffee/go-site.git' + withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_CFG')]) { + GIT_COMMIT_EMAIL = sh ( + script: 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate ls s3://kg-hub-public-data/$BUILDSTARTDATE/', + returnStdout: true + ).trim() + echo "Git committer email: ${GIT_COMMIT_EMAIL}" + } + sh 'git clone https://github.com/justaddcoffee/go-site.git' // if (env.BRANCH_NAME != 'master' || if (env.BRANCH_NAME == 'NOT THIS BRANCH') { From 3bc144dae80dd89a0d9ccaee57d34754b5004b65 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 09:41:52 -0700 Subject: [PATCH 079/130] Rename var --- Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 30dad80d..6dfd58bf 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -137,11 +137,11 @@ pipeline { echo "$BUILDSTARTDATE doesn't exist, proceeding" } withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_CFG')]) { - GIT_COMMIT_EMAIL = sh ( + REMOTE_BUILD_DIR_CONTENTS = sh ( script: 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate ls s3://kg-hub-public-data/$BUILDSTARTDATE/', returnStdout: true ).trim() - echo "Git committer email: ${GIT_COMMIT_EMAIL}" + echo "REMOTE_BUILD_DIR_CONTENTS: ${REMOTE_BUILD_DIR_CONTENTS}" } sh 'git clone https://github.com/justaddcoffee/go-site.git' From a070724f8646fca782b3135ab1e1bda795a5c8f6 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 09:50:00 -0700 Subject: [PATCH 080/130] Add check for existing remote dir on s3 --- Jenkinsfile | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 6dfd58bf..7b8bb267 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -130,6 +130,7 @@ pipeline { // code for building s3 index files dir('./gitrepo') { script { + // make sure we aren't going to clobber existing data if(fileExists('$BUILDSTARTDATE')){ echo "Will not overwrite existing directory: $BUILDSTARTDATE" sh 'exit 1' @@ -141,8 +142,14 @@ pipeline { script: 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate ls s3://kg-hub-public-data/$BUILDSTARTDATE/', returnStdout: true ).trim() - echo "REMOTE_BUILD_DIR_CONTENTS: ${REMOTE_BUILD_DIR_CONTENTS}" + echo "REMOTE_BUILD_DIR_CONTENTS: ${REMOTE_BUILD_DIR_CONTENTS}" + if($REMOTE_BUILD_DIR_CONTENTS){ + echo "Will not overwrite existing (---REMOTE S3---) directory: $BUILDSTARTDATE" + sh 'exit 1' + } } + + sh 'git clone https://github.com/justaddcoffee/go-site.git' // if (env.BRANCH_NAME != 'master' || From b0ec150c018b8a4715120c567d14cb57db7ad20a Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 09:58:14 -0700 Subject: [PATCH 081/130] More checking --- Jenkinsfile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 7b8bb267..beadb7c8 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -135,7 +135,7 @@ pipeline { echo "Will not overwrite existing directory: $BUILDSTARTDATE" sh 'exit 1' } else { - echo "$BUILDSTARTDATE doesn't exist, proceeding" + echo "local directory $BUILDSTARTDATE doesn't exist, proceeding" } withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_CFG')]) { REMOTE_BUILD_DIR_CONTENTS = sh ( @@ -143,9 +143,11 @@ pipeline { returnStdout: true ).trim() echo "REMOTE_BUILD_DIR_CONTENTS: ${REMOTE_BUILD_DIR_CONTENTS}" - if($REMOTE_BUILD_DIR_CONTENTS){ + if($REMOTE_BUILD_DIR_CONTENTS != ''){ echo "Will not overwrite existing (---REMOTE S3---) directory: $BUILDSTARTDATE" sh 'exit 1' + } else { + echo "remote directory $BUILDSTARTDATE is empty, proceeding" } } From c5e9744dfe49f52237fafbea4d9a9cd8eeeee798 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 10:02:48 -0700 Subject: [PATCH 082/130] More messages --- Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index beadb7c8..726fc4a8 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -142,8 +142,8 @@ pipeline { script: 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate ls s3://kg-hub-public-data/$BUILDSTARTDATE/', returnStdout: true ).trim() - echo "REMOTE_BUILD_DIR_CONTENTS: ${REMOTE_BUILD_DIR_CONTENTS}" - if($REMOTE_BUILD_DIR_CONTENTS != ''){ + echo "REMOTE_BUILD_DIR_CONTENTS (THIS SHOULD BE EMPTY): ${REMOTE_BUILD_DIR_CONTENTS}" + if($REMOTE_BUILD_DIR_CONTENTS){ echo "Will not overwrite existing (---REMOTE S3---) directory: $BUILDSTARTDATE" sh 'exit 1' } else { From ef6ca9f19b9724c6afdea3151cfb1072d8e31ec4 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 10:12:09 -0700 Subject: [PATCH 083/130] Remove check --- Jenkinsfile | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 726fc4a8..8aedabd6 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -143,15 +143,8 @@ pipeline { returnStdout: true ).trim() echo "REMOTE_BUILD_DIR_CONTENTS (THIS SHOULD BE EMPTY): ${REMOTE_BUILD_DIR_CONTENTS}" - if($REMOTE_BUILD_DIR_CONTENTS){ - echo "Will not overwrite existing (---REMOTE S3---) directory: $BUILDSTARTDATE" - sh 'exit 1' - } else { - echo "remote directory $BUILDSTARTDATE is empty, proceeding" - } } - - + sh 'git clone https://github.com/justaddcoffee/go-site.git' // if (env.BRANCH_NAME != 'master' || From 3b63c5ac2f86c7587b5c5e361a03e669fd3bd980 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 10:15:10 -0700 Subject: [PATCH 084/130] Add back if/else for remote checking --- Jenkinsfile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Jenkinsfile b/Jenkinsfile index 8aedabd6..d9fd2abf 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -143,6 +143,12 @@ pipeline { returnStdout: true ).trim() echo "REMOTE_BUILD_DIR_CONTENTS (THIS SHOULD BE EMPTY): ${REMOTE_BUILD_DIR_CONTENTS}" + if($REMOTE_BUILD_DIR_CONTENTS){ + echo "Will not overwrite existing (---REMOTE S3---) directory: $BUILDSTARTDATE" + // sh 'exit 1' + } else { + echo "remote directory $BUILDSTARTDATE is empty, proceeding" + } } sh 'git clone https://github.com/justaddcoffee/go-site.git' From 83db449bb6cfe3c9060c2f7cc9626a706b3457d5 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 10:25:29 -0700 Subject: [PATCH 085/130] Sanity check --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index d9fd2abf..a93d598e 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -143,7 +143,7 @@ pipeline { returnStdout: true ).trim() echo "REMOTE_BUILD_DIR_CONTENTS (THIS SHOULD BE EMPTY): ${REMOTE_BUILD_DIR_CONTENTS}" - if($REMOTE_BUILD_DIR_CONTENTS){ + if("" == ""){ echo "Will not overwrite existing (---REMOTE S3---) directory: $BUILDSTARTDATE" // sh 'exit 1' } else { From f3d7522cf963d448d5afd9b9eacbac75fa69f155 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 10:28:10 -0700 Subject: [PATCH 086/130] Please interpolate --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index a93d598e..f468e76d 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -143,7 +143,7 @@ pipeline { returnStdout: true ).trim() echo "REMOTE_BUILD_DIR_CONTENTS (THIS SHOULD BE EMPTY): ${REMOTE_BUILD_DIR_CONTENTS}" - if("" == ""){ + if('$BUILDSTARTDATE'){ echo "Will not overwrite existing (---REMOTE S3---) directory: $BUILDSTARTDATE" // sh 'exit 1' } else { From e9c4542643a5ea222b6e5e29a47dcedfa37cf474 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 10:31:41 -0700 Subject: [PATCH 087/130] Try exiting --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index f468e76d..447454e5 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -145,7 +145,7 @@ pipeline { echo "REMOTE_BUILD_DIR_CONTENTS (THIS SHOULD BE EMPTY): ${REMOTE_BUILD_DIR_CONTENTS}" if('$BUILDSTARTDATE'){ echo "Will not overwrite existing (---REMOTE S3---) directory: $BUILDSTARTDATE" - // sh 'exit 1' + sh 'exit 1' } else { echo "remote directory $BUILDSTARTDATE is empty, proceeding" } From d09e78afde7ad0fb8f221e4bc1bd8073a6e81fc7 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 10:38:45 -0700 Subject: [PATCH 088/130] Groovy variables ugh --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 447454e5..e95d2217 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -143,7 +143,7 @@ pipeline { returnStdout: true ).trim() echo "REMOTE_BUILD_DIR_CONTENTS (THIS SHOULD BE EMPTY): ${REMOTE_BUILD_DIR_CONTENTS}" - if('$BUILDSTARTDATE'){ + if('$BUILDSTARTDATE' != ''){ echo "Will not overwrite existing (---REMOTE S3---) directory: $BUILDSTARTDATE" sh 'exit 1' } else { From 8e56ce1879aa3de78b35f4e57ed5728e39a430bd Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 10:44:26 -0700 Subject: [PATCH 089/130] Groovy variables ugh part 832 --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index e95d2217..354f7e4d 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -143,7 +143,7 @@ pipeline { returnStdout: true ).trim() echo "REMOTE_BUILD_DIR_CONTENTS (THIS SHOULD BE EMPTY): ${REMOTE_BUILD_DIR_CONTENTS}" - if('$BUILDSTARTDATE' != ''){ + if('$BUILDSTARTDATE'.trim() != ''){ echo "Will not overwrite existing (---REMOTE S3---) directory: $BUILDSTARTDATE" sh 'exit 1' } else { From a49e71ec100f8b74877d0c3807ce1f83ed3642e4 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 10:48:03 -0700 Subject: [PATCH 090/130] Groovy variables --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 354f7e4d..e88278e7 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -142,7 +142,7 @@ pipeline { script: 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate ls s3://kg-hub-public-data/$BUILDSTARTDATE/', returnStdout: true ).trim() - echo "REMOTE_BUILD_DIR_CONTENTS (THIS SHOULD BE EMPTY): ${REMOTE_BUILD_DIR_CONTENTS}" + echo "REMOTE_BUILD_DIR_CONTENTS (THIS SHOULD BE EMPTY): '${REMOTE_BUILD_DIR_CONTENTS}'" if('$BUILDSTARTDATE'.trim() != ''){ echo "Will not overwrite existing (---REMOTE S3---) directory: $BUILDSTARTDATE" sh 'exit 1' From b1a56291f8086aa27a9e68ae344e5d5502e846e2 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 10:52:56 -0700 Subject: [PATCH 091/130] Groovy variables --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index e88278e7..0b4f2b8a 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -143,7 +143,7 @@ pipeline { returnStdout: true ).trim() echo "REMOTE_BUILD_DIR_CONTENTS (THIS SHOULD BE EMPTY): '${REMOTE_BUILD_DIR_CONTENTS}'" - if('$BUILDSTARTDATE'.trim() != ''){ + if("${BUILDSTARTDATE}".trim() != ''){ echo "Will not overwrite existing (---REMOTE S3---) directory: $BUILDSTARTDATE" sh 'exit 1' } else { From b84f6d511a8dcf860c7e11e30c8abce48386ac63 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 10:55:59 -0700 Subject: [PATCH 092/130] Groovy variables --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 0b4f2b8a..da5e40a7 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -143,7 +143,7 @@ pipeline { returnStdout: true ).trim() echo "REMOTE_BUILD_DIR_CONTENTS (THIS SHOULD BE EMPTY): '${REMOTE_BUILD_DIR_CONTENTS}'" - if("${BUILDSTARTDATE}".trim() != ''){ + if("${BUILDSTARTDATE}"?.trim() != ''){ echo "Will not overwrite existing (---REMOTE S3---) directory: $BUILDSTARTDATE" sh 'exit 1' } else { From 97943a23a363ab3aeb3f9235e9b34507a4ca910a Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 10:58:51 -0700 Subject: [PATCH 093/130] Groovy variable --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index da5e40a7..5baf8d1c 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -143,7 +143,7 @@ pipeline { returnStdout: true ).trim() echo "REMOTE_BUILD_DIR_CONTENTS (THIS SHOULD BE EMPTY): '${REMOTE_BUILD_DIR_CONTENTS}'" - if("${BUILDSTARTDATE}"?.trim() != ''){ + if('$BUILDSTARTDATE'){ echo "Will not overwrite existing (---REMOTE S3---) directory: $BUILDSTARTDATE" sh 'exit 1' } else { From 8a727dfb417963e6f65c78f04bc5eb1194a01f52 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 11:02:10 -0700 Subject: [PATCH 094/130] Remove check for local builddir, since this will essentially never be useful --- Jenkinsfile | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 5baf8d1c..f2c38e18 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -131,12 +131,6 @@ pipeline { dir('./gitrepo') { script { // make sure we aren't going to clobber existing data - if(fileExists('$BUILDSTARTDATE')){ - echo "Will not overwrite existing directory: $BUILDSTARTDATE" - sh 'exit 1' - } else { - echo "local directory $BUILDSTARTDATE doesn't exist, proceeding" - } withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_CFG')]) { REMOTE_BUILD_DIR_CONTENTS = sh ( script: 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate ls s3://kg-hub-public-data/$BUILDSTARTDATE/', From b6f9ca1debb9e36db1e6b7c5c4cf4dc80316838f Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 13:14:14 -0700 Subject: [PATCH 095/130] Groovy variable --- Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index f2c38e18..c066a214 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -133,11 +133,11 @@ pipeline { // make sure we aren't going to clobber existing data withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_CFG')]) { REMOTE_BUILD_DIR_CONTENTS = sh ( - script: 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate ls s3://kg-hub-public-data/$BUILDSTARTDATE/', + script: 's3cmd -c $S3CMD_CFG ls s3://kg-hub-public-data/$BUILDSTARTDATE/', returnStdout: true ).trim() echo "REMOTE_BUILD_DIR_CONTENTS (THIS SHOULD BE EMPTY): '${REMOTE_BUILD_DIR_CONTENTS}'" - if('$BUILDSTARTDATE'){ + if($REMOTE_BUILD_DIR_CONTENTS){ echo "Will not overwrite existing (---REMOTE S3---) directory: $BUILDSTARTDATE" sh 'exit 1' } else { From d14ef00a8effd5750180f5822a3fc178e9ccf9bc Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 13:17:26 -0700 Subject: [PATCH 096/130] Variable checking --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index c066a214..6a50b169 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -137,7 +137,7 @@ pipeline { returnStdout: true ).trim() echo "REMOTE_BUILD_DIR_CONTENTS (THIS SHOULD BE EMPTY): '${REMOTE_BUILD_DIR_CONTENTS}'" - if($REMOTE_BUILD_DIR_CONTENTS){ + if($REMOTE_BUILD_DIR_CONTENTS != ''){ echo "Will not overwrite existing (---REMOTE S3---) directory: $BUILDSTARTDATE" sh 'exit 1' } else { From 395c6c91da2198d5090c3c3770364bb74b1908ee Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 13:21:28 -0700 Subject: [PATCH 097/130] fix variable check --- Jenkinsfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 6a50b169..e7431db1 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -130,6 +130,8 @@ pipeline { // code for building s3 index files dir('./gitrepo') { script { + sh 'git clone https://github.com/justaddcoffee/go-site.git' + // make sure we aren't going to clobber existing data withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_CFG')]) { REMOTE_BUILD_DIR_CONTENTS = sh ( @@ -137,7 +139,7 @@ pipeline { returnStdout: true ).trim() echo "REMOTE_BUILD_DIR_CONTENTS (THIS SHOULD BE EMPTY): '${REMOTE_BUILD_DIR_CONTENTS}'" - if($REMOTE_BUILD_DIR_CONTENTS != ''){ + if('$REMOTE_BUILD_DIR_CONTENTS' != ''){ echo "Will not overwrite existing (---REMOTE S3---) directory: $BUILDSTARTDATE" sh 'exit 1' } else { @@ -145,8 +147,6 @@ pipeline { } } - sh 'git clone https://github.com/justaddcoffee/go-site.git' - // if (env.BRANCH_NAME != 'master' || if (env.BRANCH_NAME == 'NOT THIS BRANCH') { echo "Will not push if not on correct branch." From e931957eb4c06752712398b62e71cbdb9d61370c Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 13:25:34 -0700 Subject: [PATCH 098/130] Groovy variables --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index e7431db1..a8c42eb4 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -139,7 +139,7 @@ pipeline { returnStdout: true ).trim() echo "REMOTE_BUILD_DIR_CONTENTS (THIS SHOULD BE EMPTY): '${REMOTE_BUILD_DIR_CONTENTS}'" - if('$REMOTE_BUILD_DIR_CONTENTS' != ''){ + if("${REMOTE_BUILD_DIR_CONTENTS}" != ''){ echo "Will not overwrite existing (---REMOTE S3---) directory: $BUILDSTARTDATE" sh 'exit 1' } else { From 9c9b310e384421141891c17eb46786e1b5c72fe2 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 13:42:33 -0700 Subject: [PATCH 099/130] possibly fix cf invalidation problem --- Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index a8c42eb4..ab298c38 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -176,7 +176,7 @@ pipeline { // put $BUILDSTARTDATE/ in s3 bucket // sh '. venv/bin/activate && python3.7 ./go-site/scripts/directory_indexer.py -v --inject ./go-site/scripts/directory-index-template.html --directory $BUILDSTARTDATE --prefix https://kg-hub.berkeleybop.io/ -x' - sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate put -pr $BUILDSTARTDATE s3://kg-hub-public-data/' + sh 's3cmd -c $S3CMD_CFG put -pr --acl-public --mime-type=text/html --cf-invalidate $BUILDSTARTDATE s3://kg-hub-public-data/' // // make $BUILDSTARTDATE the new current/ @@ -191,7 +191,7 @@ pipeline { // scripts. sh './venv/bin/pip install pystache boto3' sh '. venv/bin/activate && python3.7 ./go-site/scripts/bucket-indexer.py --credentials $AWS_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' - sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate put top-level-index.html s3://kg-hub-public-data/index.html' + sh 's3cmd -c $S3CMD_CFG put --acl-public --mime-type=text/html --cf-invalidate top-level-index.html s3://kg-hub-public-data/index.html' // Should now appear at: // https://kg-hub.berkeleybop.io/[artifact name] From b3bf54ac3a32bdb32324b5e8781df22f78bb252a Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 13:53:55 -0700 Subject: [PATCH 100/130] Set tar.gz, nt and jnl files to their usual names, also set --cf-default-root-object$BUILDSTARTDATE --- Jenkinsfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index ab298c38..f8037f8a 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -159,10 +159,10 @@ pipeline { // make $BUILDSTARTDATE/ directory and sync to s3 bucket // sh 'mkdir $BUILDSTARTDATE/' - sh 'cp -p data/merged/merged-kg.nt.gz $BUILDSTARTDATE/' - sh 'cp -p data/merged/merged-kg.tar.gz $BUILDSTARTDATE/' + sh 'cp -p data/merged/merged-kg.nt.gz $BUILDSTARTDATE/kg-covid-19.nt.gz' + sh 'cp -p data/merged/merged-kg.tar.gz $BUILDSTARTDATE/kg-covid-19.tar.gz' sh 'touch merged-kg.jnl.gz' // REMOVE - sh 'cp -p merged-kg.jnl.gz $BUILDSTARTDATE/' + sh 'cp -p merged-kg.jnl.gz $BUILDSTARTDATE/kg-covid-19.jnl.gz' // transformed data sh 'rm -fr data/transformed/.gitkeep' sh 'cp -pr data/transformed $BUILDSTARTDATE/' @@ -176,7 +176,7 @@ pipeline { // put $BUILDSTARTDATE/ in s3 bucket // sh '. venv/bin/activate && python3.7 ./go-site/scripts/directory_indexer.py -v --inject ./go-site/scripts/directory-index-template.html --directory $BUILDSTARTDATE --prefix https://kg-hub.berkeleybop.io/ -x' - sh 's3cmd -c $S3CMD_CFG put -pr --acl-public --mime-type=text/html --cf-invalidate $BUILDSTARTDATE s3://kg-hub-public-data/' + sh 's3cmd -c $S3CMD_CFG put -pr --cf-default-root-object=$BUILDSTARTDATE --acl-public --mime-type=text/html --cf-invalidate $BUILDSTARTDATE s3://kg-hub-public-data/' // // make $BUILDSTARTDATE the new current/ From f7716a644cf87024431bc122bc4d74d07f8669b6 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 14:41:15 -0700 Subject: [PATCH 101/130] Add (probably failing) CF invalidation commands --- Jenkinsfile | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index f8037f8a..2d4ddb44 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -3,6 +3,14 @@ pipeline { environment { BUILDSTARTDATE = sh(script: "echo `date +%Y%m%d`", returnStdout: true).trim() + + // Distribution ID for the AWS CloudFront for this branch, + // used soley for invalidations. Versioned release does not + // need this as it is always a new location and the index + // upload already has an invalidation on it. For current, + // snapshot, and experimental. + AWS_CLOUDFRONT_DISTRIBUTION_ID = 'EUVSWXZQBXCFP' + AWS_CLOUDFRONT_RELEASE_DISTRIBUTION_ID = 'EUVSWXZQBXCFP' } options { @@ -176,7 +184,7 @@ pipeline { // put $BUILDSTARTDATE/ in s3 bucket // sh '. venv/bin/activate && python3.7 ./go-site/scripts/directory_indexer.py -v --inject ./go-site/scripts/directory-index-template.html --directory $BUILDSTARTDATE --prefix https://kg-hub.berkeleybop.io/ -x' - sh 's3cmd -c $S3CMD_CFG put -pr --cf-default-root-object=$BUILDSTARTDATE --acl-public --mime-type=text/html --cf-invalidate $BUILDSTARTDATE s3://kg-hub-public-data/' + sh 's3cmd -c $S3CMD_CFG put -pr --acl-public --mime-type=text/html --cf-invalidate $BUILDSTARTDATE s3://kg-hub-public-data/' // // make $BUILDSTARTDATE the new current/ @@ -193,6 +201,16 @@ pipeline { sh '. venv/bin/activate && python3.7 ./go-site/scripts/bucket-indexer.py --credentials $AWS_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' sh 's3cmd -c $S3CMD_CFG put --acl-public --mime-type=text/html --cf-invalidate top-level-index.html s3://kg-hub-public-data/index.html' + // Invalidate the CDN now that the new + // files are up. + sh 'echo "[preview]" > ./awscli_config.txt && echo "cloudfront=true" >> ./awscli_config.txt' + sh 'AWS_CONFIG_FILE=./awscli_config.txt python3.7 ./venv/bin/aws cloudfront create-invalidation --distribution-id $AWS_CLOUDFRONT_DISTRIBUTION_ID --paths "/*"' + // The release branch also needs to + // deal with the second location. + if( env.BRANCH_NAME == 'release' ){ + sh 'AWS_CONFIG_FILE=./awscli_config.txt python3.7 ./venv/bin/aws cloudfront create-invalidation --distribution-id $AWS_CLOUDFRONT_RELEASE_DISTRIBUTION_ID --paths "/*"' + } + // Should now appear at: // https://kg-hub.berkeleybop.io/[artifact name] } From a214f5178cd7720fd147da2f5d999b6ca67b593e Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 14:51:27 -0700 Subject: [PATCH 102/130] Install awscli --- Jenkinsfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Jenkinsfile b/Jenkinsfile index 2d4ddb44..c1ebb4a3 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -203,6 +203,7 @@ pipeline { // Invalidate the CDN now that the new // files are up. + sh './venv/bin/pip install awscli' sh 'echo "[preview]" > ./awscli_config.txt && echo "cloudfront=true" >> ./awscli_config.txt' sh 'AWS_CONFIG_FILE=./awscli_config.txt python3.7 ./venv/bin/aws cloudfront create-invalidation --distribution-id $AWS_CLOUDFRONT_DISTRIBUTION_ID --paths "/*"' // The release branch also needs to From 617b3a471dcc9972e216a00e6997560501a7af18 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 15:01:08 -0700 Subject: [PATCH 103/130] Fix venv issue, remove release distribution ID which is not used --- Jenkinsfile | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index c1ebb4a3..089a9bde 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -10,7 +10,6 @@ pipeline { // upload already has an invalidation on it. For current, // snapshot, and experimental. AWS_CLOUDFRONT_DISTRIBUTION_ID = 'EUVSWXZQBXCFP' - AWS_CLOUDFRONT_RELEASE_DISTRIBUTION_ID = 'EUVSWXZQBXCFP' } options { @@ -205,12 +204,7 @@ pipeline { // files are up. sh './venv/bin/pip install awscli' sh 'echo "[preview]" > ./awscli_config.txt && echo "cloudfront=true" >> ./awscli_config.txt' - sh 'AWS_CONFIG_FILE=./awscli_config.txt python3.7 ./venv/bin/aws cloudfront create-invalidation --distribution-id $AWS_CLOUDFRONT_DISTRIBUTION_ID --paths "/*"' - // The release branch also needs to - // deal with the second location. - if( env.BRANCH_NAME == 'release' ){ - sh 'AWS_CONFIG_FILE=./awscli_config.txt python3.7 ./venv/bin/aws cloudfront create-invalidation --distribution-id $AWS_CLOUDFRONT_RELEASE_DISTRIBUTION_ID --paths "/*"' - } + sh '. venv/bin/activate && AWS_CONFIG_FILE=./awscli_config.txt python3.7 ./venv/bin/aws cloudfront create-invalidation --distribution-id $AWS_CLOUDFRONT_DISTRIBUTION_ID --paths "/*"' // Should now appear at: // https://kg-hub.berkeleybop.io/[artifact name] From e8af5cec47b081eac42b777459aebc8ae7314e4e Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 15:18:47 -0700 Subject: [PATCH 104/130] Stop running run.py download for now --- Jenkinsfile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 089a9bde..ab22db99 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -62,8 +62,9 @@ pipeline { dir('./gitrepo') { script { def run_py_dl = sh( - script: '. venv/bin/activate && python3.7 run.py download', returnStatus: true - ) + // script: '. venv/bin/activate && python3.7 run.py download', returnStatus: true + script: 'BADCOMMAND', returnStatus: true + ) if (run_py_dl == 0) { if (env.BRANCH_NAME != 'master') { // upload raw to s3 if we're on correct branch echo "Will not push if not on correct branch." From d2942137a6b747eb94f8a4f8a260df24d4e16e28 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 15:52:39 -0700 Subject: [PATCH 105/130] Add aws credentials as vars --- Jenkinsfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index ab22db99..efa320a9 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -161,7 +161,9 @@ pipeline { } else { withCredentials([ file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_CFG'), - file(credentialsId: 'aws_kg_hub_push_json', variable: 'AWS_JSON') + file(credentialsId: 'aws_kg_hub_push_json', variable: 'AWS_JSON'), + string(credentialsId: 'aws_kg_hub_access_key', variable: 'AWS_ACCESS_KEY_ID'), + string(credentialsId: 'aws_kg_hub_secret_key', variable: 'AWS_SECRET_ACCESS_KEY')]) { ]) { // // make $BUILDSTARTDATE/ directory and sync to s3 bucket From dbb57fb5db9ff6dbd412cd7a8bbeab07fd0a58db Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 15:54:25 -0700 Subject: [PATCH 106/130] Formatting/syntax --- Jenkinsfile | 1 - 1 file changed, 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index efa320a9..fb4b7733 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -164,7 +164,6 @@ pipeline { file(credentialsId: 'aws_kg_hub_push_json', variable: 'AWS_JSON'), string(credentialsId: 'aws_kg_hub_access_key', variable: 'AWS_ACCESS_KEY_ID'), string(credentialsId: 'aws_kg_hub_secret_key', variable: 'AWS_SECRET_ACCESS_KEY')]) { - ]) { // // make $BUILDSTARTDATE/ directory and sync to s3 bucket // From c5a62901e6e571cc2bc96f091d98a3321cf78e10 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 16:35:14 -0700 Subject: [PATCH 107/130] Fix prefix arg to indexer (add $BUILDSTARTDATE) --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index fb4b7733..2fce03dc 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -184,7 +184,7 @@ pipeline { // // put $BUILDSTARTDATE/ in s3 bucket // - sh '. venv/bin/activate && python3.7 ./go-site/scripts/directory_indexer.py -v --inject ./go-site/scripts/directory-index-template.html --directory $BUILDSTARTDATE --prefix https://kg-hub.berkeleybop.io/ -x' + sh '. venv/bin/activate && python3.7 ./go-site/scripts/directory_indexer.py -v --inject ./go-site/scripts/directory-index-template.html --directory $BUILDSTARTDATE --prefix https://kg-hub.berkeleybop.io/$BUILDSTARTDATE -x' sh 's3cmd -c $S3CMD_CFG put -pr --acl-public --mime-type=text/html --cf-invalidate $BUILDSTARTDATE s3://kg-hub-public-data/' // From 5ca16b18f9a0311785f0f558027619d37f9601ca Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 16:39:16 -0700 Subject: [PATCH 108/130] added -u flag --- Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 2fce03dc..1996c433 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -184,8 +184,8 @@ pipeline { // // put $BUILDSTARTDATE/ in s3 bucket // - sh '. venv/bin/activate && python3.7 ./go-site/scripts/directory_indexer.py -v --inject ./go-site/scripts/directory-index-template.html --directory $BUILDSTARTDATE --prefix https://kg-hub.berkeleybop.io/$BUILDSTARTDATE -x' - sh 's3cmd -c $S3CMD_CFG put -pr --acl-public --mime-type=text/html --cf-invalidate $BUILDSTARTDATE s3://kg-hub-public-data/' + sh '. venv/bin/activate && python3.7 ./go-site/scripts/directory_indexer.py -v --inject ./go-site/scripts/directory-index-template.html --directory $BUILDSTARTDATE --prefix https://kg-hub.berkeleybop.io/$BUILDSTARTDATE -x -u' + sh 's3cmd -c $S3CMD_CFG put -pr --acl-public --mime-type=text/html --cf-invalidate $BUILDSTARTDATE s3://kg-hub-public-data/' // // make $BUILDSTARTDATE the new current/ From cb8b455ae30650597dd1cc86fd4cb22d6baac014 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 16:48:43 -0700 Subject: [PATCH 109/130] add back current/ build --- Jenkinsfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 1996c433..8e11fb7c 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -191,9 +191,9 @@ pipeline { // make $BUILDSTARTDATE the new current/ // // The following cp always times out: - // sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate cp -v -pr s3://kg-hub-public-data/$BUILDSTARTDATE/ s3://kg-hub-public-data/new_current/' - // sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate rm -fr s3://kg-hub-public-data/current' - // sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate mv --recursive s3://kg-hub-public-data/new_current s3://kg-hub-public-data/current' + sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate cp -v -pr s3://kg-hub-public-data/$BUILDSTARTDATE/ s3://kg-hub-public-data/new_current/' + sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate rm -fr s3://kg-hub-public-data/current' + sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate mv --recursive s3://kg-hub-public-data/new_current s3://kg-hub-public-data/current' // Build the top level index.html // "External" packages required to run these From 6f9793034a50e20bdde5f093da82042d184d0482 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 16:53:02 -0700 Subject: [PATCH 110/130] fix mv args --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 8e11fb7c..e3c6db25 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -193,7 +193,7 @@ pipeline { // The following cp always times out: sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate cp -v -pr s3://kg-hub-public-data/$BUILDSTARTDATE/ s3://kg-hub-public-data/new_current/' sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate rm -fr s3://kg-hub-public-data/current' - sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate mv --recursive s3://kg-hub-public-data/new_current s3://kg-hub-public-data/current' + sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate mv --recursive s3://kg-hub-public-data/new_current s3://kg-hub-public-data/current/' // Build the top level index.html // "External" packages required to run these From b200f49fcad57f67a878d0ae1a99f3c6c87132f5 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 16:57:14 -0700 Subject: [PATCH 111/130] Fix mv args part deux --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index e3c6db25..48e1a8ee 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -193,7 +193,7 @@ pipeline { // The following cp always times out: sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate cp -v -pr s3://kg-hub-public-data/$BUILDSTARTDATE/ s3://kg-hub-public-data/new_current/' sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate rm -fr s3://kg-hub-public-data/current' - sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate mv --recursive s3://kg-hub-public-data/new_current s3://kg-hub-public-data/current/' + sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate mv --recursive s3://kg-hub-public-data/new_current/ s3://kg-hub-public-data/current/' // Build the top level index.html // "External" packages required to run these From e5eb931ef198d861c0c4e01ca3bf405e933d0129 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 17:08:51 -0700 Subject: [PATCH 112/130] build new_current (not finished yet) --- Jenkinsfile | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 48e1a8ee..ee77708f 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -187,13 +187,17 @@ pipeline { sh '. venv/bin/activate && python3.7 ./go-site/scripts/directory_indexer.py -v --inject ./go-site/scripts/directory-index-template.html --directory $BUILDSTARTDATE --prefix https://kg-hub.berkeleybop.io/$BUILDSTARTDATE -x -u' sh 's3cmd -c $S3CMD_CFG put -pr --acl-public --mime-type=text/html --cf-invalidate $BUILDSTARTDATE s3://kg-hub-public-data/' + // make current/ directory + sh '. venv/bin/activate && python3.7 ./go-site/scripts/directory_indexer.py -v --inject ./go-site/scripts/directory-index-template.html --directory current --prefix https://kg-hub.berkeleybop.io/current -x -u' + sh 's3cmd -c $S3CMD_CFG put -pr --acl-public --mime-type=text/html --cf-invalidate $BUILDSTARTDATE/ s3://kg-hub-public-data/new_current' + // // make $BUILDSTARTDATE the new current/ // // The following cp always times out: - sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate cp -v -pr s3://kg-hub-public-data/$BUILDSTARTDATE/ s3://kg-hub-public-data/new_current/' - sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate rm -fr s3://kg-hub-public-data/current' - sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate mv --recursive s3://kg-hub-public-data/new_current/ s3://kg-hub-public-data/current/' + // sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate cp -v -pr s3://kg-hub-public-data/$BUILDSTARTDATE/ s3://kg-hub-public-data/new_current/' + // sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate rm -fr s3://kg-hub-public-data/current' + // sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate mv --recursive s3://kg-hub-public-data/new_current/ s3://kg-hub-public-data/current/' // Build the top level index.html // "External" packages required to run these From b4e12ea9e1e874e8587e96a9bbc0c2e64b7e34c5 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 17:12:10 -0700 Subject: [PATCH 113/130] Fix mv arg --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index ee77708f..8f886f1c 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -189,7 +189,7 @@ pipeline { // make current/ directory sh '. venv/bin/activate && python3.7 ./go-site/scripts/directory_indexer.py -v --inject ./go-site/scripts/directory-index-template.html --directory current --prefix https://kg-hub.berkeleybop.io/current -x -u' - sh 's3cmd -c $S3CMD_CFG put -pr --acl-public --mime-type=text/html --cf-invalidate $BUILDSTARTDATE/ s3://kg-hub-public-data/new_current' + sh 's3cmd -c $S3CMD_CFG put -pr --acl-public --mime-type=text/html --cf-invalidate $BUILDSTARTDATE/ s3://kg-hub-public-data/new_current/' // // make $BUILDSTARTDATE the new current/ From 80e97ee926dde70d332d56df1c1d5a4bde628ca2 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Tue, 22 Sep 2020 17:24:36 -0700 Subject: [PATCH 114/130] push to current correctly --- Jenkinsfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 8f886f1c..db676ec8 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -188,8 +188,8 @@ pipeline { sh 's3cmd -c $S3CMD_CFG put -pr --acl-public --mime-type=text/html --cf-invalidate $BUILDSTARTDATE s3://kg-hub-public-data/' // make current/ directory - sh '. venv/bin/activate && python3.7 ./go-site/scripts/directory_indexer.py -v --inject ./go-site/scripts/directory-index-template.html --directory current --prefix https://kg-hub.berkeleybop.io/current -x -u' - sh 's3cmd -c $S3CMD_CFG put -pr --acl-public --mime-type=text/html --cf-invalidate $BUILDSTARTDATE/ s3://kg-hub-public-data/new_current/' + sh '. venv/bin/activate && python3.7 ./go-site/scripts/directory_indexer.py -v --inject ./go-site/scripts/directory-index-template.html --directory $BUILDSTARTDATE --prefix https://kg-hub.berkeleybop.io/current -x -u' + sh 's3cmd -c $S3CMD_CFG put -pr --acl-public --mime-type=text/html --cf-invalidate $BUILDSTARTDATE/ s3://kg-hub-public-data/current/' // // make $BUILDSTARTDATE the new current/ From dc1b7e532ed40a4375cb928606bfbbd6ad7ee000 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Wed, 23 Sep 2020 09:23:43 -0700 Subject: [PATCH 115/130] Remove irrelevant comments --- Jenkinsfile | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index db676ec8..be002c87 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -5,10 +5,7 @@ pipeline { BUILDSTARTDATE = sh(script: "echo `date +%Y%m%d`", returnStdout: true).trim() // Distribution ID for the AWS CloudFront for this branch, - // used soley for invalidations. Versioned release does not - // need this as it is always a new location and the index - // upload already has an invalidation on it. For current, - // snapshot, and experimental. + // used soley for invalidations AWS_CLOUDFRONT_DISTRIBUTION_ID = 'EUVSWXZQBXCFP' } From d091e9232249df89e9d0ebacceba38c9c82c523b Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Wed, 23 Sep 2020 11:22:30 -0700 Subject: [PATCH 116/130] Whitespace --- Jenkinsfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index be002c87..209e80b4 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -5,8 +5,8 @@ pipeline { BUILDSTARTDATE = sh(script: "echo `date +%Y%m%d`", returnStdout: true).trim() // Distribution ID for the AWS CloudFront for this branch, - // used soley for invalidations - AWS_CLOUDFRONT_DISTRIBUTION_ID = 'EUVSWXZQBXCFP' + // used soley for invalidations + AWS_CLOUDFRONT_DISTRIBUTION_ID = 'EUVSWXZQBXCFP' } options { @@ -159,7 +159,7 @@ pipeline { withCredentials([ file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_CFG'), file(credentialsId: 'aws_kg_hub_push_json', variable: 'AWS_JSON'), - string(credentialsId: 'aws_kg_hub_access_key', variable: 'AWS_ACCESS_KEY_ID'), + string(credentialsId: 'aws_kg_hub_access_key', variable: 'AWS_ACCESS_KEY_ID'), string(credentialsId: 'aws_kg_hub_secret_key', variable: 'AWS_SECRET_ACCESS_KEY')]) { // // make $BUILDSTARTDATE/ directory and sync to s3 bucket @@ -190,7 +190,7 @@ pipeline { // // make $BUILDSTARTDATE the new current/ - // + // // The following cp always times out: // sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate cp -v -pr s3://kg-hub-public-data/$BUILDSTARTDATE/ s3://kg-hub-public-data/new_current/' // sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate rm -fr s3://kg-hub-public-data/current' From adab5cc2b12ba9891ad38d1f076670fa176fc1b3 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Wed, 23 Sep 2020 11:23:56 -0700 Subject: [PATCH 117/130] Add back download step --- Jenkinsfile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 209e80b4..081e41b5 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -59,9 +59,8 @@ pipeline { dir('./gitrepo') { script { def run_py_dl = sh( - // script: '. venv/bin/activate && python3.7 run.py download', returnStatus: true - script: 'BADCOMMAND', returnStatus: true - ) + script: '. venv/bin/activate && python3.7 run.py download', returnStatus: true + ) if (run_py_dl == 0) { if (env.BRANCH_NAME != 'master') { // upload raw to s3 if we're on correct branch echo "Will not push if not on correct branch." From 4ab2da981f40c0ff5dc278272144c139fc133b7b Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Wed, 23 Sep 2020 11:25:05 -0700 Subject: [PATCH 118/130] Fix download step --- Jenkinsfile | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 081e41b5..a4a4124b 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -73,11 +73,8 @@ pipeline { withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_CFG')]) { sh 'rm -fr data/raw || true;' sh 'mkdir -p data/raw || true' - // FIX THIS - // sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=plain/text get -r s3://kg-hub-public-data/raw/ data/raw/' - sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=plain/text get -r s3://kg-hub-public-data/raw/hp.json data/raw/hp.json' - - } + sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=plain/text get -r s3://kg-hub-public-data/raw/ data/raw/' + } } } } From 4b3aac0c40b9d24fbd6e3ef56bf546ee0607bec3 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Wed, 23 Sep 2020 11:26:25 -0700 Subject: [PATCH 119/130] Add back transform step --- Jenkinsfile | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index a4a4124b..68171796 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -84,12 +84,8 @@ pipeline { stage('Transform') { steps { dir('./gitrepo') { -// sh 'env' -// sh '. venv/bin/activate && env && python3.7 run.py transform' - withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_CFG')]) { - sh 'mkdir transformed/' - sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=plain/text get -r s3://kg-hub-public-data/transformed/ttd data/transformed/' - } + sh 'env' + sh '. venv/bin/activate && env && python3.7 run.py transform' } } } From c471aa332e06090fcfddbe76165f8a48566684e8 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Wed, 23 Sep 2020 11:27:54 -0700 Subject: [PATCH 120/130] Add back merge step --- Jenkinsfile | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 68171796..b21c3cef 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -93,15 +93,10 @@ pipeline { stage('Merge') { steps { dir('./gitrepo') { -// sh '. venv/bin/activate && python3.7 run.py merge' -// sh 'env' -// sh 'cp merged_graph_stats.yaml merged_graph_stats_$BUILDSTARTDATE.yaml' -// sh 'tar -rvf data/merged/merged-kg.tar merged_graph_stats_$BUILDSTARTDATE.yaml' - sh 'touch TEST_stats.yaml' - sh 'touch merged_graph_stats_$BUILDSTARTDATE.yaml' - sh 'mkdir -p data/merged/' - sh 'touch data/merged/merged-kg.nt.gz' - sh 'touch data/merged/merged-kg.tar.gz' + sh '. venv/bin/activate && python3.7 run.py merge' + sh 'env' + sh 'cp merged_graph_stats.yaml merged_graph_stats_$BUILDSTARTDATE.yaml' + sh 'tar -rvf data/merged/merged-kg.tar merged_graph_stats_$BUILDSTARTDATE.yaml' } } } From 726385cc672456877258e104374836d82c134712 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Wed, 23 Sep 2020 11:28:21 -0700 Subject: [PATCH 121/130] Add back make blazegraph journal step --- Jenkinsfile | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index b21c3cef..ab0735c6 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -101,21 +101,21 @@ pipeline { } } -// stage('Make blazegraph journal'){ -// steps { -// dir('./gitrepo/blazegraph') { -// git( -// url: 'https://github.com/balhoff/blazegraph-runner.git', -// branch: 'master' -// ) -// sh 'sbt stage' -// sh 'pigz -d ../data/merged/merged-kg.nt.gz' -// sh 'export JAVA_OPTS=-Xmx128G && ./target/universal/stage/bin/blazegraph-runner load --informat=ntriples --journal=../merged-kg.jnl --use-ontology-graph=true ../data/merged/merged-kg.nt' -// sh 'pigz ../merged-kg.jnl' -// sh 'pigz ../data/merged/merged-kg.nt' -// } -// } -// } + stage('Make blazegraph journal'){ + steps { + dir('./gitrepo/blazegraph') { + git( + url: 'https://github.com/balhoff/blazegraph-runner.git', + branch: 'master' + ) + sh 'sbt stage' + sh 'pigz -d ../data/merged/merged-kg.nt.gz' + sh 'export JAVA_OPTS=-Xmx128G && ./target/universal/stage/bin/blazegraph-runner load --informat=ntriples --journal=../merged-kg.jnl --use-ontology-graph=true ../data/merged/merged-kg.nt' + sh 'pigz ../merged-kg.jnl' + sh 'pigz ../data/merged/merged-kg.nt' + } + } + } stage('Publish') { steps { From b58d67cf5916e45523f35c905845579cbd69835d Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Wed, 23 Sep 2020 11:29:55 -0700 Subject: [PATCH 122/130] Fix whitespace and comments in Publish stage --- Jenkinsfile | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index ab0735c6..72cd6b0c 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -122,22 +122,21 @@ pipeline { // code for building s3 index files dir('./gitrepo') { script { - sh 'git clone https://github.com/justaddcoffee/go-site.git' - - // make sure we aren't going to clobber existing data - withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_CFG')]) { - REMOTE_BUILD_DIR_CONTENTS = sh ( - script: 's3cmd -c $S3CMD_CFG ls s3://kg-hub-public-data/$BUILDSTARTDATE/', - returnStdout: true - ).trim() - echo "REMOTE_BUILD_DIR_CONTENTS (THIS SHOULD BE EMPTY): '${REMOTE_BUILD_DIR_CONTENTS}'" - if("${REMOTE_BUILD_DIR_CONTENTS}" != ''){ + sh 'git clone https://github.com/justaddcoffee/go-site.git' + + // make sure we aren't going to clobber existing data on S3 + withCredentials([file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_CFG')]) { + REMOTE_BUILD_DIR_CONTENTS = sh ( + script: 's3cmd -c $S3CMD_CFG ls s3://kg-hub-public-data/$BUILDSTARTDATE/', + returnStdout: true.trim() + echo "REMOTE_BUILD_DIR_CONTENTS (THIS SHOULD BE EMPTY): '${REMOTE_BUILD_DIR_CONTENTS}'" + if("${REMOTE_BUILD_DIR_CONTENTS}" != ''){ echo "Will not overwrite existing (---REMOTE S3---) directory: $BUILDSTARTDATE" sh 'exit 1' - } else { + } else { echo "remote directory $BUILDSTARTDATE is empty, proceeding" - } - } + } + } // if (env.BRANCH_NAME != 'master' || if (env.BRANCH_NAME == 'NOT THIS BRANCH') { From 1c44c506200b22b168b060798b6b1bbfb93c9e79 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Wed, 23 Sep 2020 11:30:48 -0700 Subject: [PATCH 123/130] Add back check for master branch to publish stage --- Jenkinsfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 72cd6b0c..4f5e34bc 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -138,8 +138,7 @@ pipeline { } } - // if (env.BRANCH_NAME != 'master' || - if (env.BRANCH_NAME == 'NOT THIS BRANCH') { + if (env.BRANCH_NAME != 'master') { echo "Will not push if not on correct branch." } else { withCredentials([ From f443c9f791972da8366f1cdc80ce24b59dc25016 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Wed, 23 Sep 2020 11:31:14 -0700 Subject: [PATCH 124/130] Fix whitespace --- Jenkinsfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 4f5e34bc..64a94445 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -142,10 +142,10 @@ pipeline { echo "Will not push if not on correct branch." } else { withCredentials([ - file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_CFG'), - file(credentialsId: 'aws_kg_hub_push_json', variable: 'AWS_JSON'), - string(credentialsId: 'aws_kg_hub_access_key', variable: 'AWS_ACCESS_KEY_ID'), - string(credentialsId: 'aws_kg_hub_secret_key', variable: 'AWS_SECRET_ACCESS_KEY')]) { + file(credentialsId: 's3cmd_kg_hub_push_configuration', variable: 'S3CMD_CFG'), + file(credentialsId: 'aws_kg_hub_push_json', variable: 'AWS_JSON'), + string(credentialsId: 'aws_kg_hub_access_key', variable: 'AWS_ACCESS_KEY_ID'), + string(credentialsId: 'aws_kg_hub_secret_key', variable: 'AWS_SECRET_ACCESS_KEY')]) { // // make $BUILDSTARTDATE/ directory and sync to s3 bucket // From d7e519d6657bdde35587eaaa5fdb7e4058158cbe Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Wed, 23 Sep 2020 11:31:46 -0700 Subject: [PATCH 125/130] Remove unnecessary touch cmd --- Jenkinsfile | 1 - 1 file changed, 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 64a94445..32a3ec4d 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -152,7 +152,6 @@ pipeline { sh 'mkdir $BUILDSTARTDATE/' sh 'cp -p data/merged/merged-kg.nt.gz $BUILDSTARTDATE/kg-covid-19.nt.gz' sh 'cp -p data/merged/merged-kg.tar.gz $BUILDSTARTDATE/kg-covid-19.tar.gz' - sh 'touch merged-kg.jnl.gz' // REMOVE sh 'cp -p merged-kg.jnl.gz $BUILDSTARTDATE/kg-covid-19.jnl.gz' // transformed data sh 'rm -fr data/transformed/.gitkeep' From 6b92fd300eac0d74ad08514775976687c47f9bef Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Wed, 23 Sep 2020 11:32:29 -0700 Subject: [PATCH 126/130] Fix whitespace --- Jenkinsfile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 32a3ec4d..89cda65e 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -165,12 +165,12 @@ pipeline { // // put $BUILDSTARTDATE/ in s3 bucket // - sh '. venv/bin/activate && python3.7 ./go-site/scripts/directory_indexer.py -v --inject ./go-site/scripts/directory-index-template.html --directory $BUILDSTARTDATE --prefix https://kg-hub.berkeleybop.io/$BUILDSTARTDATE -x -u' - sh 's3cmd -c $S3CMD_CFG put -pr --acl-public --mime-type=text/html --cf-invalidate $BUILDSTARTDATE s3://kg-hub-public-data/' + sh '. venv/bin/activate && python3.7 ./go-site/scripts/directory_indexer.py -v --inject ./go-site/scripts/directory-index-template.html --directory $BUILDSTARTDATE --prefix https://kg-hub.berkeleybop.io/$BUILDSTARTDATE -x -u' + sh 's3cmd -c $S3CMD_CFG put -pr --acl-public --mime-type=text/html --cf-invalidate $BUILDSTARTDATE s3://kg-hub-public-data/' - // make current/ directory - sh '. venv/bin/activate && python3.7 ./go-site/scripts/directory_indexer.py -v --inject ./go-site/scripts/directory-index-template.html --directory $BUILDSTARTDATE --prefix https://kg-hub.berkeleybop.io/current -x -u' - sh 's3cmd -c $S3CMD_CFG put -pr --acl-public --mime-type=text/html --cf-invalidate $BUILDSTARTDATE/ s3://kg-hub-public-data/current/' + // make current/ directory + sh '. venv/bin/activate && python3.7 ./go-site/scripts/directory_indexer.py -v --inject ./go-site/scripts/directory-index-template.html --directory $BUILDSTARTDATE --prefix https://kg-hub.berkeleybop.io/current -x -u' + sh 's3cmd -c $S3CMD_CFG put -pr --acl-public --mime-type=text/html --cf-invalidate $BUILDSTARTDATE/ s3://kg-hub-public-data/current/' // // make $BUILDSTARTDATE the new current/ From 73fb79aad07f1a51b54c7b0a917a49ab6594eef5 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Wed, 23 Sep 2020 11:33:09 -0700 Subject: [PATCH 127/130] Remove old commented code --- Jenkinsfile | 8 -------- 1 file changed, 8 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 89cda65e..b846aae8 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -172,14 +172,6 @@ pipeline { sh '. venv/bin/activate && python3.7 ./go-site/scripts/directory_indexer.py -v --inject ./go-site/scripts/directory-index-template.html --directory $BUILDSTARTDATE --prefix https://kg-hub.berkeleybop.io/current -x -u' sh 's3cmd -c $S3CMD_CFG put -pr --acl-public --mime-type=text/html --cf-invalidate $BUILDSTARTDATE/ s3://kg-hub-public-data/current/' - // - // make $BUILDSTARTDATE the new current/ - // - // The following cp always times out: - // sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate cp -v -pr s3://kg-hub-public-data/$BUILDSTARTDATE/ s3://kg-hub-public-data/new_current/' - // sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate rm -fr s3://kg-hub-public-data/current' - // sh 's3cmd -c $S3CMD_CFG --acl-public --mime-type=text/html --cf-invalidate mv --recursive s3://kg-hub-public-data/new_current/ s3://kg-hub-public-data/current/' - // Build the top level index.html // "External" packages required to run these // scripts. From 597a97f97f44937229842638ff4df3c422b9a79e Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Wed, 23 Sep 2020 11:33:58 -0700 Subject: [PATCH 128/130] Whitespace --- Jenkinsfile | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index b846aae8..97ff2e2e 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -172,18 +172,18 @@ pipeline { sh '. venv/bin/activate && python3.7 ./go-site/scripts/directory_indexer.py -v --inject ./go-site/scripts/directory-index-template.html --directory $BUILDSTARTDATE --prefix https://kg-hub.berkeleybop.io/current -x -u' sh 's3cmd -c $S3CMD_CFG put -pr --acl-public --mime-type=text/html --cf-invalidate $BUILDSTARTDATE/ s3://kg-hub-public-data/current/' - // Build the top level index.html - // "External" packages required to run these - // scripts. - sh './venv/bin/pip install pystache boto3' - sh '. venv/bin/activate && python3.7 ./go-site/scripts/bucket-indexer.py --credentials $AWS_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' - sh 's3cmd -c $S3CMD_CFG put --acl-public --mime-type=text/html --cf-invalidate top-level-index.html s3://kg-hub-public-data/index.html' - - // Invalidate the CDN now that the new - // files are up. - sh './venv/bin/pip install awscli' - sh 'echo "[preview]" > ./awscli_config.txt && echo "cloudfront=true" >> ./awscli_config.txt' - sh '. venv/bin/activate && AWS_CONFIG_FILE=./awscli_config.txt python3.7 ./venv/bin/aws cloudfront create-invalidation --distribution-id $AWS_CLOUDFRONT_DISTRIBUTION_ID --paths "/*"' + // Build the top level index.html + // "External" packages required to run these + // scripts. + sh './venv/bin/pip install pystache boto3' + sh '. venv/bin/activate && python3.7 ./go-site/scripts/bucket-indexer.py --credentials $AWS_JSON --bucket kg-hub-public-data --inject ./go-site/scripts/directory-index-template.html --prefix https://kg-hub.berkeleybop.io/ > top-level-index.html' + sh 's3cmd -c $S3CMD_CFG put --acl-public --mime-type=text/html --cf-invalidate top-level-index.html s3://kg-hub-public-data/index.html' + + // Invalidate the CDN now that the new + // files are up. + sh './venv/bin/pip install awscli' + sh 'echo "[preview]" > ./awscli_config.txt && echo "cloudfront=true" >> ./awscli_config.txt' + sh '. venv/bin/activate && AWS_CONFIG_FILE=./awscli_config.txt python3.7 ./venv/bin/aws cloudfront create-invalidation --distribution-id $AWS_CLOUDFRONT_DISTRIBUTION_ID --paths "/*"' // Should now appear at: // https://kg-hub.berkeleybop.io/[artifact name] From 8c5337d18b60de40562c017018e31593c51f9056 Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Wed, 23 Sep 2020 11:34:24 -0700 Subject: [PATCH 129/130] Typo --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 97ff2e2e..3e369f39 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -5,7 +5,7 @@ pipeline { BUILDSTARTDATE = sh(script: "echo `date +%Y%m%d`", returnStdout: true).trim() // Distribution ID for the AWS CloudFront for this branch, - // used soley for invalidations + // used solely for invalidations AWS_CLOUDFRONT_DISTRIBUTION_ID = 'EUVSWXZQBXCFP' } From c6f024cb6b211051bed86bfd5fefe7f5756e97bc Mon Sep 17 00:00:00 2001 From: Justin Reese Date: Wed, 23 Sep 2020 11:37:09 -0700 Subject: [PATCH 130/130] Missing close paren --- Jenkinsfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Jenkinsfile b/Jenkinsfile index 3e369f39..41c1f2ee 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -129,6 +129,7 @@ pipeline { REMOTE_BUILD_DIR_CONTENTS = sh ( script: 's3cmd -c $S3CMD_CFG ls s3://kg-hub-public-data/$BUILDSTARTDATE/', returnStdout: true.trim() + ) echo "REMOTE_BUILD_DIR_CONTENTS (THIS SHOULD BE EMPTY): '${REMOTE_BUILD_DIR_CONTENTS}'" if("${REMOTE_BUILD_DIR_CONTENTS}" != ''){ echo "Will not overwrite existing (---REMOTE S3---) directory: $BUILDSTARTDATE"