Skip to content

Commit

Permalink
Issue #6 - Adding ability to ignore inputs by path (#7)
Browse files Browse the repository at this point in the history
  • Loading branch information
robons committed Oct 18, 2022
1 parent 09c813d commit 2557056
Show file tree
Hide file tree
Showing 7 changed files with 433 additions and 232 deletions.
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,18 @@ To use the csv-to-csvw GitHub action,
commit-outputs-to-branch: true
# Boolean indicating whether the outputs (i.e. out folder) should be published to the gh-pages branch and GitHub Pages (default is true).
publish-to-gh-pages: true
# File paths which csv-to-csvw should avoid converting into CSV-Ws
paths-to-exclude: |
# This will ignore all files immediately in the `ignore` directory.
ignore/*
# This will recursively ignore all files in any subfolders of the `ignore` directory.
ignore/**/*
# (WARNING: Advanced Configuration) Boolean indicating whether to only convert CSV files which have a companion JSON configuration file into CSV-Ws.
config-required: false
# (WARNING: Advanced Configuration) Boolean indicating whether to output debug statements in order to help diagnose a fault or unexpected behaviour.
debug: false
```

![The create workflow file image shows creating and commiting a new action.yaml file.](docs/images/create_workflow_file.png "Create Workflow File")

Make sure to configure the [`commit-outputs-to-branch`](#committing-outputs-to-out-folder) and [`publish-to-gh-pages`](#github-pages) configuration parameters to specify where you want the build outputs to be published.
Expand Down
282 changes: 50 additions & 232 deletions action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,18 @@ inputs:
description: Boolean indicating whether the outputs (i.e. out folder) should be published to the gh-pages branch and GitHub Pages (default is true).
default: true
required: false

on:
push:
branches:
- main
paths-to-exclude:
description: File paths which csv-to-csvw should avoid converting into CSV-Ws
default: ""
required: false
config-required:
description: "(WARNING: Advanced Configuration) Only convert CSV files which have a companion JSON configuration file into CSV-Ws."
default: false
required: false
debug:
description: "(WARNING: Advanced Configuration) Whether to output debug statements in order to help diagnose a fault or unexpected behaviour."
default: false
required: false

runs:
using: composite
Expand All @@ -44,12 +51,25 @@ runs:
run: ls -la $GITHUB_WORKSPACE
shell: bash

- name: Set GITHUB_PATH to Action's Repository
# Ensures that scripts we run are resolved relative to the action's repository.
run: echo "${{ github.action_path }}" >> $GITHUB_PATH
shell: bash

- name: Configure git
run: |
git config --global user.name "CSV-W from csv upload generator"
git pull
shell: bash

- name: Update gh-pages and clear branch
if: ${{ fromJSON(inputs.publish-to-gh-pages) == true }}
run: |
git checkout gh-pages || git checkout -b gh-pages
git pull
git checkout "$GITHUB_REF_NAME"
shell: bash

- name: Get added/modified/removed files
id: get-added-changed-removed-files
uses: jitterbit/get-changed-files@v1
Expand All @@ -58,188 +78,24 @@ runs:

- name: Build and inspect files
id: build-and-inspect-files
run: |
echo "::set-output name=has_outputs::${{ toJSON(false) }}"
mapfile -d ',' -t detected_files < <(printf '%s,' '${{ steps.get-added-changed-removed-files.outputs.added_modified }}')
mapfile -d ',' -t renamed_files < <(printf '%s,' '${{ steps.get-added-changed-removed-files.outputs.renamed }}')
detected_files+=(${renamed_files[@]})
echo "detected_files: ${detected_files[@]}"
processed_files=()
for file in "${detected_files[@]}"; do
echo $'\n'
echo "Detected file: ${file}"
echo "======================"
# If the file is already processed, it will be ignored.
if [[ " ${processed_files[@]} " =~ " ${file} " ]]; then
echo "File is already processed, hence ignoring it."
continue
fi
root_file=false
file_path="${file%/*}"
# When the file is at the root, the above returns file name. We need to make sure the file_path is set to '' when this is the case.
if [[ $file_path == $file ]]; then
root_file=true
file_path=""
fi
file_without_extension="${file%.*}"
file_name="${file_without_extension##*/}"
file_extension="${file##*.}"
# Detecting the top folder from the file path. E.g. csv/ is the top folder when the path is csv/sub-folder/my-data.csv
if [[ $root_file == true ]]; then
top_folder=""
else
top_folder=$(echo "$file_path" | cut -d "/" -f1)
fi
echo "---Extracting File Info"
echo "file_path: ${file_path}"
echo "file_without_extension: ${file_without_extension}"
echo "file_name: ${file_name}"
echo "file_extension: ${file_extension}"
echo "top_folder: ${top_folder}"
# The out/ folder is used for storing the outputs generated by csvcubed build and inspect commands. Hence, the user should not use this folder to commit any inputs. Any inputs committed to this folder will not be procssed.
if [[ ($file_extension == "csv" || $file_extension == "json") && $top_folder != "out" ]]; then
echo "---Processing File: ${file}"
csv_file=""
config_file=""
if [[ $file_extension == "csv" ]]; then
csv_file=${file}
for file_secondary in "${detected_files[@]}"; do
potential_config_file="${file_without_extension}.json"
if [[ -f $potential_config_file ]]; then
config_file=$potential_config_file
fi
done
elif [[ $file_extension == "json" ]]; then
config_file=$file
potential_csv_file="${file_without_extension}.csv"
echo "potential_csv_file: ${potential_csv_file}"
if [[ -f $potential_csv_file ]]; then
csv_file=$potential_csv_file
else
config_file=NULL
fi
fi
echo "csv_file for processing: ${csv_file}"
echo "config_file for processing: ${config_file}"
if [[ -f $csv_file ]]; then
echo "---Producing Output Path"
# Creating the out path to store outputs.
if [[ $root_file == true ]]; then
out_path="out/${file_name}/"
else
out_path="out/${file_path}/${file_name}/"
fi
echo "out_path: ${out_path}"
echo "---Building CSV-W"
echo "Building CSV-W"
if [[ -f $config_file ]]; then
echo "Config for ${csv_file} is available: ${config_file}"
csvcubed build "$csv_file" -c "$config_file" --out "$out_path" --validation-errors-to-file
else
echo "Config for ${csv_file} is not available"
csvcubed build "$csv_file" --out "$out_path" --validation-errors-to-file
fi
echo "---Inspecting CSV-W"
mapfile -d $'\0' inspectable_files < <(find "${GITHUB_WORKSPACE}/${out_path}" -name "*.csv-metadata.json" -type f -print0)
for inspect_file in "${inspectable_files[@]}"; do
echo "Inspecting file: ${inspect_file}"
inspect_file_path="${inspect_file%/*}"
inspect_file_name="${inspect_file##*/}"
inspect_output_file="${out_path}${inspect_file_name}_inspect_output.txt"
csvcubed inspect "$inspect_file" > "$inspect_output_file"
done
processed_files+=($csv_file)
processed_files+=($config_file)
echo "processed_files: ${processed_files[@]}"
echo "::set-output name=has_outputs::${{ toJSON(true) }}"
echo "---Finished Processing File: ${file}"
echo "======================"
fi
fi
done
env:
FILES_ADDED_MODIFIED: ${{ steps.get-added-changed-removed-files.outputs.added_modified }}
FILES_RENAMED: ${{ steps.get-added-changed-removed-files.outputs.renamed }}
PATHS_TO_EXCLUDE_IN: ${{ inputs.paths-to-exclude }}
JSON_CONFIG_REQUIRED: ${{ inputs.config-required }}
COMMIT_OUTPUTS_TO_GH_PAGES: ${{ fromJSON(inputs.commit-outputs-to-branch) }}
DEBUG: ${{ fromJSON(inputs.debug) == true }}
run: build-and-inspect-files.sh
shell: bash

- name: Handle deleted files
id: process-deleted-files
run: |
echo "::set-output name=has_outputs::${{ toJSON(false) }}"
mapfile -d ',' -t deleted_files < <(printf '%s,' '${{ steps.get-added-changed-removed-files.outputs.removed }}')
for file in "${deleted_files[@]}"; do
echo $'\n'
echo "---Handling Deletions for File: ${file}"
root_file=false
file_path="${file%/*}"
if [[ $file_path == $file ]]; then
file_path=""
root_file=true
fi
file_without_extension="${file%.*}"
file_name="${file_without_extension##*/}"
file_extension="${file##*.}"
# Detecting the top folder from the file path. E.g. csv/ is the top folder when the path is csv/sub-folder/my-data.csv
if [[ $root_file == true ]]; then
top_folder=""
else
top_folder=$(echo "$file_path" | cut -d "/" -f1)
fi
echo "---Extracting Delete File Info"
echo "file_path: ${file_path}"
echo "file_without_extension: ${file_without_extension}"
echo "file_name: ${file_name}"
echo "file_extension: ${file_extension}"
echo "top_folder: ${top_folder}"
# Delete config and outputs when a csv outside the out folder is deleted.
if [[ $file_extension != "csv" || $top_folder == "out" ]]; then
echo "File is not a csv or a it is a file inside out folder, hence ignoring it."
continue
fi
config_file="${file_without_extension}.json"
if [[ $root_file == true ]]; then
out_folder="out/${file_name}/"
else
out_folder="out/${file_path}/${file_name}/"
fi
echo "config_file: ${config_file}"
echo "out_folder: ${out_folder}"
if [[ -f $config_file ]]; then
echo "config file exists, hence deleting."
git rm "$config_file"
git commit -m "Deleted config file for file ${file} - $(date +'%d-%m-%Y at %H:%M:%S')"
fi
if [[ -d $out_folder ]]; then
echo "outputs exist, hence deleting."
git rm -r "$out_folder"
git commit -m "Deleted outputs for file ${file} - $(date +'%d-%m-%Y at %H:%M:%S')"
fi
git push
echo "::set-output name=has_outputs::${{ toJSON(true) }}"
echo "---Finished Handling Deletions for File: ${file}"
done
env:
FILES_REMOVED: ${{ steps.get-added-changed-removed-files.outputs.removed }}
JSON_CONFIG_REQUIRED: ${{ inputs.config-required }}
COMMIT_OUTPUTS_TO_GH_PAGES: ${{ fromJSON(inputs.commit-outputs-to-branch) }}
DEBUG: ${{ fromJSON(inputs.debug) == true }}
run: process-deleted-files.sh
shell: bash

- name: Publish CSV-Ws and logs to artefacts
Expand All @@ -250,61 +106,23 @@ runs:
path: out

- name: Commit generated CSV-Ws and logs to the repository
#if: ${{ fromJSON(steps.build-and-inspect-files.outputs.has_outputs) == true }}
if: ${{ (fromJSON(steps.build-and-inspect-files.outputs.has_outputs) == true) && (fromJSON(inputs.commit-outputs-to-branch) == true) }}
if: ${{ (fromJSON(steps.build-and-inspect-files.outputs.has_outputs) == true || fromJSON(steps.process-deleted-files.outputs.has_outputs)) && (fromJSON(inputs.commit-outputs-to-branch) == true) }}
run: |
echo "--Commiting Outputs to Main Branch"
if [[ -d "$RUNNER_TEMP/out" ]]
then
cp -r "$RUNNER_TEMP/out" .
fi
git add out/
git commit -m "Outputs generated from csv upload - $(date +'%d-%m-%Y at %H:%M:%S')"
git commit -m "CSV-W Updates - $(date +'%d-%m-%Y at %H:%M:%S')"
git push
shell: bash

- name: Publish CSV-Ws and logs to GitHub Pages
#if: ${{ fromJSON(steps.build-and-inspect-files.outputs.has_outputs) == true }}
if: ${{ (fromJSON(steps.build-and-inspect-files.outputs.has_outputs) == true) && (fromJSON(inputs.publish-to-gh-pages) == true) }}
run: |
echo "--Publishing Output to GitHub Pages"
git checkout -b gh-pages
repo_name=${GITHUB_REPOSITORY#*/}
username=${GITHUB_REPOSITORY_OWNER}
commit_id=${GITHUB_SHA}
mapfile -d ',' -t out_files < <(printf '%s,' $(find . -type f -path '*out/*'))
processed_out_files=$(printf ",%s" "${out_files[@]}")
touch .nojekyll
touch index.html
cat > index.html <<EOL
<!doctype html>
<html>
<head>
</head>
<body>
<h3>CSV-Ws generated are as below. The latest commit id is ${commit_id}.</h3>
<div id="files-container"></div>
<script type="text/javascript">
var html_str = "<ul>";
var files = "${processed_out_files}".split(',');
files.shift()
files.sort()
files.forEach(function(file) {
file = file.replace("./","")
link = "https://${username}.github.io/${repo_name}/"+file
html_str += "<li>"+"<a href='"+ link + "'>"+file+"</a></li>";
});
html_str += "</ul>";
document.getElementById("files-container").innerHTML = html_str;
</script>
</body>
</html>
EOL
git add .nojekyll
git add index.html
git add out/
git commit -a -m "Updating outputs in GitHub Pages - $(date +'%d-%m-%Y at %H:%M:%S')"
git push --set-upstream origin gh-pages -f
if: ${{ (fromJSON(steps.build-and-inspect-files.outputs.has_outputs) || fromJSON(steps.process-deleted-files.outputs.has_outputs)) && (fromJSON(inputs.publish-to-gh-pages) == true) }}
env:
DEBUG: ${{ fromJSON(inputs.debug) == true }}
run: publish-csvws-to-github-pages.sh
shell: bash

0 comments on commit 2557056

Please sign in to comment.