Merge branch '11-bug-fix' of github.com:GSS-Cogs/csv-to-csvw-action i… #205

Workflow file for this run

.github/workflows/action-test.yaml at baea6bc

	name: csv-to-csvw action

	on:
	push:
	branches:
	#- main
	# - 11-bug-fix

	jobs:
	generate_csvw_from_csv_upload:
	name: Generate CSV-W from csv upload
	runs-on: ubuntu-latest
	steps:
	- name: Set up Python 3.9
	uses: actions/setup-python@v2
	with:
	python-version: 3.9

	- name: Install csvcubed
	run: pip install csvcubed
	shell: bash

	- name: Verify csvcubed installation
	run: csvcubed version
	shell: bash

	- name: Check out repository
	uses: actions/checkout@v2
	with:
	fetch-depth: 0

	- name: View working directory
	run: ls -la $GITHUB_WORKSPACE
	shell: bash

	- name: Configure git
	run: \|
	git config --global user.name "CSV-W from csv upload generator"
	git pull
	shell: bash

	- name: Get added/modified/removed files
	id: get-added-changed-removed-files
	uses: jitterbit/get-changed-files@v1
	with:
	format: "csv"

	- name: Build and inspect files
	id: build-and-inspect-files
	run: \|
	echo "::set-output name=has_outputs::${{ toJSON(false) }}"

	mapfile -d ',' -t detected_files < <(printf '%s,' '${{ steps.get-added-changed-removed-files.outputs.added_modified }}')
	mapfile -d ',' -t renamed_files < <(printf '%s,' '${{ steps.get-added-changed-removed-files.outputs.renamed }}')
	detected_files+=(${renamed_files[@]})
	echo "detected_files: ${detected_files[@]}"

	processed_files=()
	for file in "${detected_files[@]}"; do
	echo $'\n'
	echo "Detected file: ${file}"
	echo "======================"

	# If the file is already processed, it will be ignored.
	if [[ " ${processed_files[@]} " =~ " ${file} " ]]; then
	echo "File is already processed, hence ignoring it."
	continue
	fi

	root_file=false
	file_path="${file%/*}"
	# When the file is at the root, the above returns file name. We need to make sure the file_path is set to '' when this is the case.
	if [[ $file_path == $file ]]; then
	root_file=true
	file_path=""
	fi
	file_without_extension="${file%.*}"
	file_name="${file_without_extension##*/}"
	file_extension="${file##*.}"

	# Detecting the top folder from the file path. E.g. csv/ is the top folder when the path is csv/sub-folder/my-data.csv
	if [[ $root_file == true ]]; then
	top_folder=""
	else
	top_folder=$(echo "$file_path" \| cut -d "/" -f1)
	fi

	echo "---Extracting File Info"
	echo "file_path: ${file_path}"
	echo "file_without_extension: ${file_without_extension}"
	echo "file_name: ${file_name}"
	echo "file_extension: ${file_extension}"
	echo "top_folder: ${top_folder}"

	# The out/ folder is used for storing the outputs generated by csvcubed build and inspect commands. Hence, the user should not use this folder to commit any inputs. Any inputs committed to this folder will not be procssed.
	if [[ ($file_extension == "csv" \|\| $file_extension == "json") && $top_folder != "out" ]]; then
	echo "---Processing File: ${file}"
	csv_file=""
	config_file=""
	if [[ $file_extension == "csv" ]]; then
	csv_file=${file}
	for file_secondary in "${detected_files[@]}"; do
	potential_config_file="${file_without_extension}.json"
	if [[ -f $potential_config_file ]]; then
	config_file=$potential_config_file
	fi
	done
	elif [[ $file_extension == "json" ]]; then
	config_file=$file
	potential_csv_file="${file_without_extension}.csv"
	echo "potential_csv_file: ${potential_csv_file}"
	if [[ -f $potential_csv_file ]]; then
	csv_file=$potential_csv_file
	else
	config_file=NULL
	fi
	fi

	echo "csv_file for processing: ${csv_file}"
	echo "config_file for processing: ${config_file}"

	if [[ -f $csv_file ]]; then
	echo "---Producing Output Path"
	# Creating the out path to store outputs.
	if [[ $root_file == true ]]; then
	out_path="out/${file_name}/"
	else
	out_path="out/${file_path}/${file_name}/"
	fi
	echo "out_path: ${out_path}"

	echo "---Building CSV-W"
	echo "Building CSV-W"
	if [[ -f $config_file ]]; then
	echo "Config for ${csv_file} is available: ${config_file}"
	csvcubed build "$csv_file" -c "$config_file" --out "$out_path" --validation-errors-to-file
	else
	echo "Config for ${csv_file} is not available"
	csvcubed build "$csv_file" --out "$out_path" --validation-errors-to-file
	fi

	echo "---Inspecting CSV-W"
	mapfile -d $'\0' inspectable_files < <(find "${GITHUB_WORKSPACE}/${out_path}" -name "*.csv-metadata.json" -type f -print0)
	for inspect_file in "${inspectable_files[@]}"; do
	echo "Inspecting file: ${inspect_file}"
	inspect_file_path="${inspect_file%/*}"
	inspect_file_name="${inspect_file##*/}"
	inspect_output_file="${out_path}${inspect_file_name}_inspect_output.txt"

	csvcubed inspect "$inspect_file" > "$inspect_output_file"
	done

	processed_files+=($csv_file)
	processed_files+=($config_file)
	echo "processed_files: ${processed_files[@]}"

	echo "::set-output name=has_outputs::${{ toJSON(true) }}"

	echo "---Finished Processing File: ${file}"
	echo "======================"
	fi
	fi
	done
	shell: bash

	- name: Handle deleted files
	id: process-deleted-files
	run: \|
	echo "::set-output name=has_outputs::${{ toJSON(false) }}"
	mapfile -d ',' -t deleted_files < <(printf '%s,' '${{ steps.get-added-changed-removed-files.outputs.removed }}')
	for file in "${deleted_files[@]}"; do
	echo $'\n'
	echo "---Handling Deletions for File: ${file}"
	root_file=false
	file_path="${file%/*}"
	if [[ $file_path == $file ]]; then
	file_path=""
	root_file=true
	fi
	file_without_extension="${file%.*}"
	file_name="${file_without_extension##*/}"
	file_extension="${file##*.}"

	# Detecting the top folder from the file path. E.g. csv/ is the top folder when the path is csv/sub-folder/my-data.csv
	if [[ $root_file == true ]]; then
	top_folder=""
	else
	top_folder=$(echo "$file_path" \| cut -d "/" -f1)
	fi

	echo "---Extracting Delete File Info"
	echo "file_path: ${file_path}"
	echo "file_without_extension: ${file_without_extension}"
	echo "file_name: ${file_name}"
	echo "file_extension: ${file_extension}"
	echo "top_folder: ${top_folder}"

	# Delete config and outputs when a csv outside the out folder is deleted.
	if [[ $file_extension != "csv" \|\| $top_folder == "out" ]]; then
	echo "File is not a csv or a it is a file inside out folder, hence ignoring it."
	continue
	fi

	config_file="${file_without_extension}.json"

	if [[ $root_file == true ]]; then
	out_folder="out/${file_name}/"
	else
	out_folder="out/${file_path}/${file_name}/"
	fi

	echo "config_file: ${config_file}"
	echo "out_folder: ${out_folder}"

	if [[ -f $config_file ]]; then
	echo "config file exists, hence deleting."
	git rm "$config_file"
	git commit -m "Deleted config file for file ${file} - $(date +'%d-%m-%Y at %H:%M:%S')"
	fi

	if [[ -d $out_folder ]]; then
	echo "outputs exist, hence deleting."
	git rm -r "$out_folder"
	git commit -m "Deleted outputs for file ${file} - $(date +'%d-%m-%Y at %H:%M:%S')"
	fi

	git push

	echo "::set-output name=has_outputs::${{ toJSON(true) }}"
	echo "---Finished Handling Deletions for File: ${file}"
	done
	shell: bash

	- name: Publish CSV-Ws and logs to artefacts
	if: ${{ fromJSON(steps.build-and-inspect-files.outputs.has_outputs) == true }}
	uses: actions/upload-artifact@v2
	with:
	name: assets-for-download
	path: out

	- name: Commit generated CSV-Ws and logs to the repository
	if: ${{ fromJSON(steps.build-and-inspect-files.outputs.has_outputs) == true }}
	# if: ${{ (fromJSON(steps.build-and-inspect-inputs.outputs.has_outputs) == true) && (fromJSON(inputs.commit-outputs) == true) }}
	run: \|
	echo "--Commiting Outputs to Main Branch"

	git add out/
	git commit -m "Outputs generated from csv upload - $(date +'%d-%m-%Y at %H:%M:%S')"
	git push
	shell: bash

	- name: Publish CSV-Ws and logs to GitHub Pages
	if: ${{ fromJSON(steps.build-and-inspect-files.outputs.has_outputs) == true }}
	#if: ${{ (fromJSON(steps.build-and-inspect-inputs.outputs.has_outputs) == true) && (fromJSON(inputs.publish-to-gh-pages) == true) }}
	run: \|
	echo "--Publishing Output to GitHub Pages"

	git checkout -b gh-pages
	rm -r LICENSE
	rm -r README.md
	rm -r .github/workflows

	repo_name=${GITHUB_REPOSITORY#*/}
	username=${GITHUB_REPOSITORY_OWNER}
	commit_id=${GITHUB_SHA}
	mapfile -d ',' -t out_files < <(printf '%s,' $(find . -type f -path 'out/'))
	processed_out_files=$(printf ",%s" "${out_files[@]}")

	touch .nojekyll
	touch index.html

	cat > index.html <<EOL
	<!doctype html>
	<html>
	<head>
	</head>
	<body>
	<h3>CSV-Ws generated are as below. The latest commit id is ${commit_id}.</h3>
	<div id="files-container"></div>
	<script type="text/javascript">
	var html_str = "<ul>";
	var files = "${processed_out_files}".split(',');
	files.shift()
	files.sort()
	files.forEach(function(file) {
	file = file.replace("./","")
	link = "https://${username}.github.io/${repo_name}/"+file
	html_str += "<li>"+"<a href='"+ link + "'>"+file+"</a></li>";
	});
	html_str += "</ul>";
	document.getElementById("files-container").innerHTML = html_str;
	</script>
	</body>
	</html>
	EOL

	git add .nojekyll
	git add index.html
	git commit -a -m "Updating outputs in GitHub Pages - $(date +'%d-%m-%Y at %H:%M:%S')"
	git push --set-upstream origin gh-pages -f
	shell: bash

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Merge branch '11-bug-fix' of github.com:GSS-Cogs/csv-to-csvw-action i… #205

Workflow file

Merge branch '11-bug-fix' of github.com:GSS-Cogs/csv-to-csvw-action i… #205

Jobs

Run details

Workflow file for this run