diff --git a/Jenkinsfile b/Jenkinsfile index 3f5375fd..619230b4 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -100,6 +100,14 @@ pipeline { } } + stage('QC'){ + steps { + dir('./gitrepo') { + sh '. venv/bin/activate && python3.7 queries/qc/ensmallen_report.py' + } + } + } + stage('Make blazegraph journal'){ steps { dir('./gitrepo/blazegraph') { diff --git a/queries/qc/ensmallen_report.py b/queries/qc/ensmallen_report.py new file mode 100644 index 00000000..7d8a6c6a --- /dev/null +++ b/queries/qc/ensmallen_report.py @@ -0,0 +1,26 @@ +from ensmallen_graph import EnsmallenGraph +import tarfile +import compress_json + +tar = tarfile.open("kg-covid-19.tar.gz") + +tar.extractall() +graph = EnsmallenGraph.from_csv( + edge_path="merged-kg_edges.tsv", + sources_column="subject", + destinations_column="object", + directed=False, + edge_types_column="edge_label", + default_edge_type="biolink:association", + node_path="merged-kg_nodes.tsv", + nodes_column="id", + node_types_column="category", + default_node_type="biolink:NamedThing", + ignore_duplicated_edges=True, + ignore_duplicated_nodes=True, + force_conversion_to_undirected=True +) + +json_report = graph.report() +compress_json.dump(json_report, "kg-covid-19-ensmallen-report.json") + diff --git a/queries/qc/ensmallen_report_expected_values.yaml b/queries/qc/ensmallen_report_expected_values.yaml new file mode 100644 index 00000000..5e713f5d --- /dev/null +++ b/queries/qc/ensmallen_report_expected_values.yaml @@ -0,0 +1,12 @@ +degrees_mode +bidirectional_rate +strongly_connected_components_number +nodes_number +edges_number +density +singleton_nodes +unique_edge_types_number +connected_components_number +unique_node_types_number +degrees_median + diff --git a/tests/resources/qc/kg-covid-19-TEST.tar.gz b/tests/resources/qc/kg-covid-19-TEST.tar.gz new file mode 100644 index 00000000..1d7e645a Binary files /dev/null and b/tests/resources/qc/kg-covid-19-TEST.tar.gz differ