Skip to content

Pull and Validate Spreadsheet Data #491

Pull and Validate Spreadsheet Data

Pull and Validate Spreadsheet Data #491

Workflow file for this run

name: Pull and Validate Spreadsheet Data
env:
SPREADSHEET_ID: 1YGt_Y70oy6qc09ZZ7kip9DM2JGtRC2fAHxi6JXOIsSk
on:
workflow_dispatch: {}
schedule:
- cron: '0 10 * * *'
jobs:
Pull-And-Merge-Spreadsheet-Data:
runs-on: ubuntu-latest
steps:
- name: Pulling Maui
working-directory: ${{ runner.temp }}
env:
MAUI_SHEET_ID: '987737350'
MAUI_FILE_NAME: 'maui.csv'
run: |
wget "${{ format('https://docs.google.com/spreadsheets/d/{0}/export?format=csv&gid={1}', '$SPREADSHEET_ID', '$MAUI_SHEET_ID') }}" -O $MAUI_FILE_NAME
- name: Pulling Honolulu (Oahu)
working-directory: ${{ runner.temp }}
env:
HONOLULU_SHEET_ID: '448730928'
HONOLULU_SHEET_NAME: 'honolulu.csv'
run: |
wget "${{ format('https://docs.google.com/spreadsheets/d/{0}/export?format=csv&gid={1}', '$SPREADSHEET_ID', '$HONOLULU_SHEET_ID') }}" -O $HONOLULU_SHEET_NAME
- name: Pulling Hawaii (Big Island)
working-directory: ${{ runner.temp }}
env:
HAWAII_SHEET_ID: '835193382'
HAWAII_SHEET_NAME: 'hawaii.csv'
run: |
wget "${{ format('https://docs.google.com/spreadsheets/d/{0}/export?format=csv&gid={1}', '$SPREADSHEET_ID', '$HAWAII_SHEET_ID') }}" -O $HAWAII_SHEET_NAME
- name: Pulling Kauai
working-directory: ${{ runner.temp }}
env:
KAUAI_SHEET_ID: '1657052642'
KAUAI_SHEET_NAME: 'kauai.csv'
run: |
wget "${{ format('https://docs.google.com/spreadsheets/d/{0}/export?format=csv&gid={1}', '$SPREADSHEET_ID', '$KAUAI_SHEET_ID') }}" -O $KAUAI_SHEET_NAME
- name: Merge Island CSVs
working-directory: ${{ runner.temp }} # insert a new line after the last comma of each csv, that way each appended CSV after #Maui will append on a new line
run: | # also started from 3rd line for only unique data from each file
sed 's/,$/,\n/' hawaii.csv > hawaii_insert_line.csv
sed 's/,$/,\n/' honolulu.csv > honolulu_insert_line.csv
sed 's/,$/,\n/' kauai.csv > kauai_insert_line.csv
sed 's/,$/,\n/' maui.csv > maui_insert_line.csv
rm hawaii.csv
rm honolulu.csv
rm kauai.csv
rm maui.csv
head -n 2 maui_insert_line.csv > hawaii-zoning-data.csv && tail -n+3 -q *.csv >> hawaii-zoning-data.csv
- name: Upload Artifact
uses: actions/upload-artifact@v2
with:
name: hawaii-zoning-data.csv
path: ${{ runner.temp }}/hawaii-zoning-data.csv
Validate-Data:
needs: Pull-And-Merge-Spreadsheet-Data
runs-on: ubuntu-latest
steps:
- uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install csvkit
run: |
pip install csvkit
- name: Download Artifact
uses: actions/download-artifact@v2
with:
name: hawaii-zoning-data.csv
- name: Validate Data # Not doing any real validation, just outputting csvclean results to detect any surface problems
run: |
csvclean -n hawaii-zoning-data.csv
Validate-CSV:
needs: Validate-Data
runs-on: ubuntu-latest
steps:
- uses: 'actions/checkout@v3'
- name: Set up Python 3
uses: actions/setup-python@v4
with:
python-version: '3.10'
cache: 'pip'
- name: Install petl
working-directory: data-pipeline/csv-validation
run: |
pip install -r ./requirements.txt
- name: Run Validation
working-directory: data-pipeline/csv-validation
run: |
python validation.py
Commit-Data:
needs: Validate-CSV
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Download Artifact
uses: actions/download-artifact@v2
with:
name: hawaii-zoning-data.csv
- name: Get current date
run: echo "NOW=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
- name: Move Artifact
run:
mv hawaii-zoning-data.csv data/
- name: Commit to repo
run: |
if [ -n "$(git status --porcelain)" ]; then
echo "there are changes to commit";
else
exit 0;
fi
git config --local user.email "github-actions@github.com"
git config --local user.name "GitHub Actions"
git add data/hawaii-zoning-data.csv
git commit -m "Spreadsheet data update ${{ env.NOW }}"
git push