Skip to content

Merge branch '11-bug-fix' of github.com:GSS-Cogs/csv-to-csvw-action i… #205

Merge branch '11-bug-fix' of github.com:GSS-Cogs/csv-to-csvw-action i…

Merge branch '11-bug-fix' of github.com:GSS-Cogs/csv-to-csvw-action i… #205

Workflow file for this run

name: csv-to-csvw action
on:
push:
branches:
#- main
# - 11-bug-fix
jobs:
generate_csvw_from_csv_upload:
name: Generate CSV-W from csv upload
runs-on: ubuntu-latest
steps:
- name: Set up Python 3.9
uses: actions/setup-python@v2
with:
python-version: 3.9
- name: Install csvcubed
run: pip install csvcubed
shell: bash
- name: Verify csvcubed installation
run: csvcubed version
shell: bash
- name: Check out repository
uses: actions/checkout@v2
with:
fetch-depth: 0
- name: View working directory
run: ls -la $GITHUB_WORKSPACE
shell: bash
- name: Configure git
run: |
git config --global user.name "CSV-W from csv upload generator"
git pull
shell: bash
- name: Get added/modified/removed files
id: get-added-changed-removed-files
uses: jitterbit/get-changed-files@v1
with:
format: "csv"
- name: Build and inspect files
id: build-and-inspect-files
run: |
echo "::set-output name=has_outputs::${{ toJSON(false) }}"
mapfile -d ',' -t detected_files < <(printf '%s,' '${{ steps.get-added-changed-removed-files.outputs.added_modified }}')
mapfile -d ',' -t renamed_files < <(printf '%s,' '${{ steps.get-added-changed-removed-files.outputs.renamed }}')
detected_files+=(${renamed_files[@]})
echo "detected_files: ${detected_files[@]}"
processed_files=()
for file in "${detected_files[@]}"; do
echo $'\n'
echo "Detected file: ${file}"
echo "======================"
# If the file is already processed, it will be ignored.
if [[ " ${processed_files[@]} " =~ " ${file} " ]]; then
echo "File is already processed, hence ignoring it."
continue
fi
root_file=false
file_path="${file%/*}"
# When the file is at the root, the above returns file name. We need to make sure the file_path is set to '' when this is the case.
if [[ $file_path == $file ]]; then
root_file=true
file_path=""
fi
file_without_extension="${file%.*}"
file_name="${file_without_extension##*/}"
file_extension="${file##*.}"
# Detecting the top folder from the file path. E.g. csv/ is the top folder when the path is csv/sub-folder/my-data.csv
if [[ $root_file == true ]]; then
top_folder=""
else
top_folder=$(echo "$file_path" | cut -d "/" -f1)
fi
echo "---Extracting File Info"
echo "file_path: ${file_path}"
echo "file_without_extension: ${file_without_extension}"
echo "file_name: ${file_name}"
echo "file_extension: ${file_extension}"
echo "top_folder: ${top_folder}"
# The out/ folder is used for storing the outputs generated by csvcubed build and inspect commands. Hence, the user should not use this folder to commit any inputs. Any inputs committed to this folder will not be procssed.
if [[ ($file_extension == "csv" || $file_extension == "json") && $top_folder != "out" ]]; then
echo "---Processing File: ${file}"
csv_file=""
config_file=""
if [[ $file_extension == "csv" ]]; then
csv_file=${file}
for file_secondary in "${detected_files[@]}"; do
potential_config_file="${file_without_extension}.json"
if [[ -f $potential_config_file ]]; then
config_file=$potential_config_file
fi
done
elif [[ $file_extension == "json" ]]; then
config_file=$file
potential_csv_file="${file_without_extension}.csv"
echo "potential_csv_file: ${potential_csv_file}"
if [[ -f $potential_csv_file ]]; then
csv_file=$potential_csv_file
else
config_file=NULL
fi
fi
echo "csv_file for processing: ${csv_file}"
echo "config_file for processing: ${config_file}"
if [[ -f $csv_file ]]; then
echo "---Producing Output Path"
# Creating the out path to store outputs.
if [[ $root_file == true ]]; then
out_path="out/${file_name}/"
else
out_path="out/${file_path}/${file_name}/"
fi
echo "out_path: ${out_path}"
echo "---Building CSV-W"
echo "Building CSV-W"
if [[ -f $config_file ]]; then
echo "Config for ${csv_file} is available: ${config_file}"
csvcubed build "$csv_file" -c "$config_file" --out "$out_path" --validation-errors-to-file
else
echo "Config for ${csv_file} is not available"
csvcubed build "$csv_file" --out "$out_path" --validation-errors-to-file
fi
echo "---Inspecting CSV-W"
mapfile -d $'\0' inspectable_files < <(find "${GITHUB_WORKSPACE}/${out_path}" -name "*.csv-metadata.json" -type f -print0)
for inspect_file in "${inspectable_files[@]}"; do
echo "Inspecting file: ${inspect_file}"
inspect_file_path="${inspect_file%/*}"
inspect_file_name="${inspect_file##*/}"
inspect_output_file="${out_path}${inspect_file_name}_inspect_output.txt"
csvcubed inspect "$inspect_file" > "$inspect_output_file"
done
processed_files+=($csv_file)
processed_files+=($config_file)
echo "processed_files: ${processed_files[@]}"
echo "::set-output name=has_outputs::${{ toJSON(true) }}"
echo "---Finished Processing File: ${file}"
echo "======================"
fi
fi
done
shell: bash
- name: Handle deleted files
id: process-deleted-files
run: |
echo "::set-output name=has_outputs::${{ toJSON(false) }}"
mapfile -d ',' -t deleted_files < <(printf '%s,' '${{ steps.get-added-changed-removed-files.outputs.removed }}')
for file in "${deleted_files[@]}"; do
echo $'\n'
echo "---Handling Deletions for File: ${file}"
root_file=false
file_path="${file%/*}"
if [[ $file_path == $file ]]; then
file_path=""
root_file=true
fi
file_without_extension="${file%.*}"
file_name="${file_without_extension##*/}"
file_extension="${file##*.}"
# Detecting the top folder from the file path. E.g. csv/ is the top folder when the path is csv/sub-folder/my-data.csv
if [[ $root_file == true ]]; then
top_folder=""
else
top_folder=$(echo "$file_path" | cut -d "/" -f1)
fi
echo "---Extracting Delete File Info"
echo "file_path: ${file_path}"
echo "file_without_extension: ${file_without_extension}"
echo "file_name: ${file_name}"
echo "file_extension: ${file_extension}"
echo "top_folder: ${top_folder}"
# Delete config and outputs when a csv outside the out folder is deleted.
if [[ $file_extension != "csv" || $top_folder == "out" ]]; then
echo "File is not a csv or a it is a file inside out folder, hence ignoring it."
continue
fi
config_file="${file_without_extension}.json"
if [[ $root_file == true ]]; then
out_folder="out/${file_name}/"
else
out_folder="out/${file_path}/${file_name}/"
fi
echo "config_file: ${config_file}"
echo "out_folder: ${out_folder}"
if [[ -f $config_file ]]; then
echo "config file exists, hence deleting."
git rm "$config_file"
git commit -m "Deleted config file for file ${file} - $(date +'%d-%m-%Y at %H:%M:%S')"
fi
if [[ -d $out_folder ]]; then
echo "outputs exist, hence deleting."
git rm -r "$out_folder"
git commit -m "Deleted outputs for file ${file} - $(date +'%d-%m-%Y at %H:%M:%S')"
fi
git push
echo "::set-output name=has_outputs::${{ toJSON(true) }}"
echo "---Finished Handling Deletions for File: ${file}"
done
shell: bash
- name: Publish CSV-Ws and logs to artefacts
if: ${{ fromJSON(steps.build-and-inspect-files.outputs.has_outputs) == true }}
uses: actions/upload-artifact@v2
with:
name: assets-for-download
path: out
- name: Commit generated CSV-Ws and logs to the repository
if: ${{ fromJSON(steps.build-and-inspect-files.outputs.has_outputs) == true }}
# if: ${{ (fromJSON(steps.build-and-inspect-inputs.outputs.has_outputs) == true) && (fromJSON(inputs.commit-outputs) == true) }}
run: |
echo "--Commiting Outputs to Main Branch"
git add out/
git commit -m "Outputs generated from csv upload - $(date +'%d-%m-%Y at %H:%M:%S')"
git push
shell: bash
- name: Publish CSV-Ws and logs to GitHub Pages
if: ${{ fromJSON(steps.build-and-inspect-files.outputs.has_outputs) == true }}
#if: ${{ (fromJSON(steps.build-and-inspect-inputs.outputs.has_outputs) == true) && (fromJSON(inputs.publish-to-gh-pages) == true) }}
run: |
echo "--Publishing Output to GitHub Pages"
git checkout -b gh-pages
rm -r LICENSE
rm -r README.md
rm -r .github/workflows
repo_name=${GITHUB_REPOSITORY#*/}
username=${GITHUB_REPOSITORY_OWNER}
commit_id=${GITHUB_SHA}
mapfile -d ',' -t out_files < <(printf '%s,' $(find . -type f -path '*out/*'))
processed_out_files=$(printf ",%s" "${out_files[@]}")
touch .nojekyll
touch index.html
cat > index.html <<EOL
<!doctype html>
<html>
<head>
</head>
<body>
<h3>CSV-Ws generated are as below. The latest commit id is ${commit_id}.</h3>
<div id="files-container"></div>
<script type="text/javascript">
var html_str = "<ul>";
var files = "${processed_out_files}".split(',');
files.shift()
files.sort()
files.forEach(function(file) {
file = file.replace("./","")
link = "https://${username}.github.io/${repo_name}/"+file
html_str += "<li>"+"<a href='"+ link + "'>"+file+"</a></li>";
});
html_str += "</ul>";
document.getElementById("files-container").innerHTML = html_str;
</script>
</body>
</html>
EOL
git add .nojekyll
git add index.html
git commit -a -m "Updating outputs in GitHub Pages - $(date +'%d-%m-%Y at %H:%M:%S')"
git push --set-upstream origin gh-pages -f
shell: bash