From df34131d6861ec1d4634ba99f7c2bf0d1ac69d72 Mon Sep 17 00:00:00 2001 From: JohT Date: Sun, 18 Jun 2023 21:08:52 +0200 Subject: [PATCH] Add a script to export the whole database as CSV --- .github/workflows/code-reports.yml | 9 ++++++ cypher/CYPHER.md | 1 + .../Export_the_whole_database_as_CSV.cypher | 2 ++ scripts/reports/DatabaseCsvExport.sh | 29 +++++++++++++++++++ scripts/setupNeo4j.sh | 10 +++++++ 5 files changed, 51 insertions(+) create mode 100644 cypher/Export_the_whole_database_as_CSV.cypher create mode 100755 scripts/reports/DatabaseCsvExport.sh diff --git a/.github/workflows/code-reports.yml b/.github/workflows/code-reports.yml index 48122b30e..d6743896e 100644 --- a/.github/workflows/code-reports.yml +++ b/.github/workflows/code-reports.yml @@ -118,6 +118,15 @@ jobs: if-no-files-found: error retention-days: 5 + # Upload Database Export + - name: Archive exported database + uses: actions/upload-artifact@v3 + with: + name: code-report-database-export-${{ matrix.java }}-python-${{ matrix.python }}-mambaforge-${{ matrix.mambaforge }} + path: ./temp/**/import + if-no-files-found: error + retention-days: 5 + # Commit and push the native image agent results - name: Display environment variable "github.event_name" run: echo "github.event_name=${{ github.event_name }}" diff --git a/cypher/CYPHER.md b/cypher/CYPHER.md index 300c08047..a64d4ba10 100644 --- a/cypher/CYPHER.md +++ b/cypher/CYPHER.md @@ -71,6 +71,7 @@ Script | Directory | Description | [Cyclic_Dependencies_Concatenated.cypher](./Cyclic_Dependencies/Cyclic_Dependencies_Concatenated.cypher) | Cyclic_Dependencies | Cyclic Dependencies Concatenated | | [Cyclic_Dependencies_as_List.cypher](./Cyclic_Dependencies/Cyclic_Dependencies_as_List.cypher) | Cyclic_Dependencies | Cyclic Dependencies as List | | [Cyclic_Dependencies_as_unwinded_List.cypher](./Cyclic_Dependencies/Cyclic_Dependencies_as_unwinded_List.cypher) | Cyclic_Dependencies | Cyclic Dependencies as unwinded List | +| [Export_the_whole_database_as_CSV.cypher](./Export_the_whole_database_as_CSV.cypher) | | Export the whole database as CSV | | [External_package_usage_overall.cypher](./External_Dependencies/External_package_usage_overall.cypher) | External_Dependencies | External package usage overall | | [External_package_usage_per_artifact.cypher](./External_Dependencies/External_package_usage_per_artifact.cypher) | External_Dependencies | External package usage per artifact | | [External_package_usage_per_artifact_and_package.cypher](./External_Dependencies/External_package_usage_per_artifact_and_package.cypher) | External_Dependencies | External package usage per artifact and package | diff --git a/cypher/Export_the_whole_database_as_CSV.cypher b/cypher/Export_the_whole_database_as_CSV.cypher new file mode 100644 index 000000000..ba9314a7b --- /dev/null +++ b/cypher/Export_the_whole_database_as_CSV.cypher @@ -0,0 +1,2 @@ +// Export the whole database as CSV +CALL apoc.export.csv.all("codegraph.csv", {}) \ No newline at end of file diff --git a/scripts/reports/DatabaseCsvExport.sh b/scripts/reports/DatabaseCsvExport.sh new file mode 100755 index 000000000..87195b184 --- /dev/null +++ b/scripts/reports/DatabaseCsvExport.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +# Exports the whole graph database as a CSV file using the APOC procedure "apoc.export.csv.all" +# The exported file can be found in the subdirectory "import" inside the tools/neo4j.. directory. + +# Overrideable Constants (defaults also defined in sub scripts) +REPORTS_DIRECTORY=${REPORTS_DIRECTORY:-"reports"} + +## Get this "scripts/reports" directory if not already set +# Even if $BASH_SOURCE is made for Bourne-like shells it is also supported by others and therefore here the preferred solution. +# CDPATH reduces the scope of the cd command to potentially prevent unintended directory changes. +# This way non-standard tools like readlink aren't needed. +REPORTS_SCRIPT_DIR=${REPORTS_SCRIPT_DIR:-$( CDPATH=. cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P )} +echo "ExternalDependenciesCsv: REPORTS_SCRIPT_DIR=${REPORTS_SCRIPT_DIR}" + +# Get the "scripts" directory by taking the path of this script and going one directory up. +SCRIPTS_DIR=${SCRIPTS_DIR:-"${REPORTS_SCRIPT_DIR}/.."} +echo "ExternalDependenciesCsv SCRIPTS_DIR=${SCRIPTS_DIR}" + +# Get the "cypher" directory by taking the path of this script and going two directory up and then to "cypher". +CYPHER_DIR=${CYPHER_DIR:-"${SCRIPTS_DIR}/../cypher"} +echo "ExportDatabase: CYPHER_DIR=$CYPHER_DIR" + +# Define functions to execute a cypher query from within the given file (first and only argument) +source "${SCRIPTS_DIR}/executeQueryFunctions.sh" + +# Execute Database Export Procedure in background +# The exported file can then be found in the subdirectory "import" inside the tools/neo4j.. directory. +execute_cypher "${CYPHER_DIR}/Export_the_whole_database_as_CSV.cypher" & \ No newline at end of file diff --git a/scripts/setupNeo4j.sh b/scripts/setupNeo4j.sh index 7b6115540..2939d0023 100755 --- a/scripts/setupNeo4j.sh +++ b/scripts/setupNeo4j.sh @@ -21,6 +21,7 @@ NEO4J_BOLT_PORT=${NEO4J_BOLT_PORT:-"7687"} NEO4J_INSTALLATION_NAME="neo4j-${NEO4J_EDITION}-${NEO4J_VERSION}" NEO4J_INSTALLATION_DIRECTORY="${TOOLS_DIRECTORY}/${NEO4J_INSTALLATION_NAME}" NEO4J_CONFIG="${NEO4J_INSTALLATION_DIRECTORY}/conf/neo4j.conf" +NEO4J_APOC_CONFIG="${NEO4J_INSTALLATION_DIRECTORY}/conf/apoc.conf" NEO4J_APOC_PLUGIN_ARTIFACT="apoc-${NEO4J_APOC_PLUGIN_VERSION}-all.jar" NEO4J_GDS_PLUGIN_ARTIFACT="neo4j-graph-data-science-${NEO4J_GDS_PLUGIN_VERSION}.jar" @@ -147,6 +148,15 @@ if [ ! -f "${NEO4J_INSTALLATION_DIRECTORY}/plugins/${NEO4J_APOC_PLUGIN_ARTIFACT} echo "setupNeo4j: Failed to download and install ${NEO4J_APOC_PLUGIN_ARTIFACT}" exit 1 fi + + # Configure Neo4j Plugin "Awesome Procedures for Neo4j" (APOC) + echo "setupNeo4j: Configuring Neo4j Plugin ${NEO4J_APOC_PLUGIN_ARTIFACT} (APOC)" + { + echo "# Reference: https://neo4j.com/docs/apoc/current/config/#_apoc_export_file_enabled" + echo "" + echo "# Enables writing local files to disk for file export. Default=false" + echo "apoc.export.file.enabled=true" + } >> "${NEO4J_APOC_CONFIG}" else echo "setupNeo4j: ${NEO4J_APOC_PLUGIN_ARTIFACT} already installed" fi