Merge branch '11-bug-fix' of github.com:GSS-Cogs/csv-to-csvw-action i… #204
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: csv-to-csvw action | |
on: | |
push: | |
branches: | |
#- main | |
- 11-bug-fix | |
jobs: | |
generate_csvw_from_csv_upload: | |
name: Generate CSV-W from csv upload | |
runs-on: ubuntu-latest | |
steps: | |
- name: Set up Python 3.9 | |
uses: actions/setup-python@v2 | |
with: | |
python-version: 3.9 | |
- name: Install csvcubed | |
run: pip install csvcubed | |
shell: bash | |
- name: Verify csvcubed installation | |
run: csvcubed version | |
shell: bash | |
- name: Check out repository | |
uses: actions/checkout@v2 | |
with: | |
fetch-depth: 0 | |
- name: View working directory | |
run: ls -la $GITHUB_WORKSPACE | |
shell: bash | |
- name: Configure git | |
run: | | |
git config --global user.name "CSV-W from csv upload generator" | |
git pull | |
shell: bash | |
- name: Get added/modified/removed files | |
id: get-added-changed-removed-files | |
uses: jitterbit/get-changed-files@v1 | |
with: | |
format: "csv" | |
- name: Build and inspect files | |
id: build-and-inspect-files | |
run: | | |
echo "::set-output name=has_outputs::${{ toJSON(false) }}" | |
mapfile -d ',' -t detected_files < <(printf '%s,' '${{ steps.get-added-changed-removed-files.outputs.added_modified }}') | |
mapfile -d ',' -t renamed_files < <(printf '%s,' '${{ steps.get-added-changed-removed-files.outputs.renamed }}') | |
detected_files+=(${renamed_files[@]}) | |
echo "detected_files: ${detected_files[@]}" | |
processed_files=() | |
for file in "${detected_files[@]}"; do | |
echo $'\n' | |
echo "Detected file: ${file}" | |
echo "======================" | |
# If the file is already processed, it will be ignored. | |
if [[ " ${processed_files[@]} " =~ " ${file} " ]]; then | |
echo "File is already processed, hence ignoring it." | |
continue | |
fi | |
root_file=false | |
file_path="${file%/*}" | |
# When the file is at the root, the above returns file name. We need to make sure the file_path is set to '' when this is the case. | |
if [[ $file_path == $file ]]; then | |
root_file=true | |
file_path="" | |
fi | |
file_without_extension="${file%.*}" | |
file_name="${file_without_extension##*/}" | |
file_extension="${file##*.}" | |
# Detecting the top folder from the file path. E.g. csv/ is the top folder when the path is csv/sub-folder/my-data.csv | |
if [[ $root_file == true ]]; then | |
top_folder="" | |
else | |
top_folder=$(echo "$file_path" | cut -d "/" -f1) | |
fi | |
echo "---Extracting File Info" | |
echo "file_path: ${file_path}" | |
echo "file_without_extension: ${file_without_extension}" | |
echo "file_name: ${file_name}" | |
echo "file_extension: ${file_extension}" | |
echo "top_folder: ${top_folder}" | |
# The out/ folder is used for storing the outputs generated by csvcubed build and inspect commands. Hence, the user should not use this folder to commit any inputs. Any inputs committed to this folder will not be procssed. | |
if [[ ($file_extension == "csv" || $file_extension == "json") && $top_folder != "out" ]]; then | |
echo "---Processing File: ${file}" | |
csv_file="" | |
config_file="" | |
if [[ $file_extension == "csv" ]]; then | |
csv_file=${file} | |
for file_secondary in "${detected_files[@]}"; do | |
potential_config_file="${file_without_extension}.json" | |
if [[ -f $potential_config_file ]]; then | |
config_file=$potential_config_file | |
fi | |
done | |
elif [[ $file_extension == "json" ]]; then | |
config_file=$file | |
potential_csv_file="${file_without_extension}.csv" | |
echo "potential_csv_file: ${potential_csv_file}" | |
if [[ -f $potential_csv_file ]]; then | |
csv_file=$potential_csv_file | |
else | |
config_file=NULL | |
fi | |
fi | |
echo "csv_file for processing: ${csv_file}" | |
echo "config_file for processing: ${config_file}" | |
if [[ -f $csv_file ]]; then | |
echo "---Producing Output Path" | |
# Creating the out path to store outputs. | |
if [[ $root_file == true ]]; then | |
out_path="out/${file_name}/" | |
else | |
out_path="out/${file_path}/${file_name}/" | |
fi | |
echo "out_path: ${out_path}" | |
echo "---Building CSV-W" | |
echo "Building CSV-W" | |
if [[ -f $config_file ]]; then | |
echo "Config for ${csv_file} is available: ${config_file}" | |
csvcubed build "$csv_file" -c "$config_file" --out "$out_path" --validation-errors-to-file | |
else | |
echo "Config for ${csv_file} is not available" | |
csvcubed build "$csv_file" --out "$out_path" --validation-errors-to-file | |
fi | |
echo "---Inspecting CSV-W" | |
mapfile -d $'\0' inspectable_files < <(find "${GITHUB_WORKSPACE}/${out_path}" -name "*.csv-metadata.json" -type f -print0) | |
for inspect_file in "${inspectable_files[@]}"; do | |
echo "Inspecting file: ${inspect_file}" | |
inspect_file_path="${inspect_file%/*}" | |
inspect_file_name="${inspect_file##*/}" | |
inspect_output_file="${out_path}${inspect_file_name}_inspect_output.txt" | |
csvcubed inspect "$inspect_file" > "$inspect_output_file" | |
done | |
processed_files+=($csv_file) | |
processed_files+=($config_file) | |
echo "processed_files: ${processed_files[@]}" | |
echo "::set-output name=has_outputs::${{ toJSON(true) }}" | |
echo "---Finished Processing File: ${file}" | |
echo "======================" | |
fi | |
fi | |
done | |
shell: bash | |
- name: Handle deleted files | |
id: process-deleted-files | |
run: | | |
echo "::set-output name=has_outputs::${{ toJSON(false) }}" | |
mapfile -d ',' -t deleted_files < <(printf '%s,' '${{ steps.get-added-changed-removed-files.outputs.removed }}') | |
for file in "${deleted_files[@]}"; do | |
echo $'\n' | |
echo "---Handling Deletions for File: ${file}" | |
root_file=false | |
file_path="${file%/*}" | |
if [[ $file_path == $file ]]; then | |
file_path="" | |
root_file=true | |
fi | |
file_without_extension="${file%.*}" | |
file_name="${file_without_extension##*/}" | |
file_extension="${file##*.}" | |
# Detecting the top folder from the file path. E.g. csv/ is the top folder when the path is csv/sub-folder/my-data.csv | |
if [[ $root_file == true ]]; then | |
top_folder="" | |
else | |
top_folder=$(echo "$file_path" | cut -d "/" -f1) | |
fi | |
echo "---Extracting Delete File Info" | |
echo "file_path: ${file_path}" | |
echo "file_without_extension: ${file_without_extension}" | |
echo "file_name: ${file_name}" | |
echo "file_extension: ${file_extension}" | |
echo "top_folder: ${top_folder}" | |
# Delete config and outputs when a csv outside the out folder is deleted. | |
if [[ $file_extension != "csv" || $top_folder == "out" ]]; then | |
echo "File is not a csv or a it is a file inside out folder, hence ignoring it." | |
continue | |
fi | |
config_file="${file_without_extension}.json" | |
if [[ $root_file == true ]]; then | |
out_folder="out/${file_name}/" | |
else | |
out_folder="out/${file_path}/${file_name}/" | |
fi | |
echo "config_file: ${config_file}" | |
echo "out_folder: ${out_folder}" | |
if [[ -f $config_file ]]; then | |
echo "config file exists, hence deleting." | |
git rm "$config_file" | |
git commit -m "Deleted config file for file ${file} - $(date +'%d-%m-%Y at %H:%M:%S')" | |
fi | |
if [[ -d $out_folder ]]; then | |
echo "outputs exist, hence deleting." | |
git rm -r "$out_folder" | |
git commit -m "Deleted outputs for file ${file} - $(date +'%d-%m-%Y at %H:%M:%S')" | |
fi | |
git push | |
echo "::set-output name=has_outputs::${{ toJSON(true) }}" | |
echo "---Finished Handling Deletions for File: ${file}" | |
done | |
shell: bash | |
- name: Publish CSV-Ws and logs to artefacts | |
if: ${{ fromJSON(steps.build-and-inspect-files.outputs.has_outputs) == true }} | |
uses: actions/upload-artifact@v2 | |
with: | |
name: assets-for-download | |
path: out | |
- name: Commit generated CSV-Ws and logs to the repository | |
if: ${{ fromJSON(steps.build-and-inspect-files.outputs.has_outputs) == true }} | |
# if: ${{ (fromJSON(steps.build-and-inspect-inputs.outputs.has_outputs) == true) && (fromJSON(inputs.commit-outputs) == true) }} | |
run: | | |
echo "--Commiting Outputs to Main Branch" | |
git add out/ | |
git commit -m "Outputs generated from csv upload - $(date +'%d-%m-%Y at %H:%M:%S')" | |
git push | |
shell: bash | |
- name: Publish CSV-Ws and logs to GitHub Pages | |
if: ${{ fromJSON(steps.build-and-inspect-files.outputs.has_outputs) == true }} | |
#if: ${{ (fromJSON(steps.build-and-inspect-inputs.outputs.has_outputs) == true) && (fromJSON(inputs.publish-to-gh-pages) == true) }} | |
run: | | |
echo "--Publishing Output to GitHub Pages" | |
git checkout -b gh-pages | |
rm -r LICENSE | |
rm -r README.md | |
rm -r .github/workflows | |
repo_name=${GITHUB_REPOSITORY#*/} | |
username=${GITHUB_REPOSITORY_OWNER} | |
commit_id=${GITHUB_SHA} | |
mapfile -d ',' -t out_files < <(printf '%s,' $(find . -type f -path '*out/*')) | |
processed_out_files=$(printf ",%s" "${out_files[@]}") | |
touch .nojekyll | |
touch index.html | |
cat > index.html <<EOL | |
<!doctype html> | |
<html> | |
<head> | |
</head> | |
<body> | |
<h3>CSV-Ws generated are as below. The latest commit id is ${commit_id}.</h3> | |
<div id="files-container"></div> | |
<script type="text/javascript"> | |
var html_str = "<ul>"; | |
var files = "${processed_out_files}".split(','); | |
files.shift() | |
files.sort() | |
files.forEach(function(file) { | |
file = file.replace("./","") | |
link = "https://${username}.github.io/${repo_name}/"+file | |
html_str += "<li>"+"<a href='"+ link + "'>"+file+"</a></li>"; | |
}); | |
html_str += "</ul>"; | |
document.getElementById("files-container").innerHTML = html_str; | |
</script> | |
</body> | |
</html> | |
EOL | |
git add .nojekyll | |
git add index.html | |
git commit -a -m "Updating outputs in GitHub Pages - $(date +'%d-%m-%Y at %H:%M:%S')" | |
git push --set-upstream origin gh-pages -f | |
shell: bash |