Skip to content

Commit

Permalink
CI: validate JSON & fix benchmark (#8567)
Browse files Browse the repository at this point in the history
* CI: validate JSON

* as GHA

* PT1.8

* 32g

Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com>
  • Loading branch information
2 people authored and awaelchli committed Aug 3, 2021
1 parent b6a5ad6 commit 3586f2e
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 67 deletions.
Original file line number Diff line number Diff line change
@@ -1,21 +1,19 @@
name: GPU Parity testing

on:
schedule:
- cron: "0 0 * * *" # At the end of every day
schedules:
- cron: "0 0 * * *" # At the end of every day
displayName: Daily midnight benchmark
branches:
include:
- "master"

jobs:
parity-test:
timeoutInMinutes: 120

cancelTimeoutInMinutes: 2

- job: benchmarks
timeoutInMinutes: "90"
cancelTimeoutInMinutes: "2"
pool: gridai-spot-pool

container:
# base ML image: mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.2-cudnn8-ubuntu18.04
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.8-torch1.6"

image: "pytorchlightning/pytorch_lightning:base-cuda-py3.8-torch1.8"
options: "--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all --shm-size=32g"
workspace:
clean: all

Expand Down
31 changes: 16 additions & 15 deletions .azure-pipelines/gpu-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,19 @@ trigger:
- '*'
branches:
include:
- master
- release/*
- refs/tags/*
- "master"
- "release/*"
- "refs/tags/*"
pr:
- master
- release/*
- "master"
- "release/*"

jobs:
- job: pytest
# how long to run the job before automatically cancelling
timeoutInMinutes: 45
timeoutInMinutes: "45"
# how much time to give 'run always even if cancelled tasks' before stopping them
cancelTimeoutInMinutes: 2
cancelTimeoutInMinutes: "2"

pool: gridai-spot-pool

Expand Down Expand Up @@ -92,14 +92,15 @@ jobs:
testRunTitle: '$(Agent.OS) - $(Build.DefinitionName) - Python $(python.version)'
condition: succeededOrFailed()

- task: PublishCodeCoverageResults@1
displayName: 'Publish coverage report'
inputs:
codeCoverageTool: 'cobertura'
summaryFileLocation: 'coverage.xml'
reportDirectory: '$(Build.SourcesDirectory)/htmlcov'
testRunTitle: '$(Agent.OS) - $(Build.BuildNumber)[$(Agent.JobName)] - Python $(python.version)'
condition: succeededOrFailed()
# todo: re-enable after schema check pass, also atm it seems does not have any effect
#- task: PublishCodeCoverageResults@2
# displayName: 'Publish coverage report'
# inputs:
# codeCoverageTool: 'Cobertura'
# summaryFileLocation: 'coverage.xml'
# reportDirectory: '$(Build.SourcesDirectory)/htmlcov'
# testRunTitle: '$(Agent.OS) - $(Build.BuildNumber)[$(Agent.JobName)] - Python $(python.version)'
# condition: succeededOrFailed()

- script: |
set -e
Expand Down
15 changes: 0 additions & 15 deletions .github/workflows/ci_dockers.yml
Original file line number Diff line number Diff line change
Expand Up @@ -123,21 +123,6 @@ jobs:
push: false
timeout-minutes: 50

build-nvidia:
runs-on: ubuntu-20.04
# todo: temporarily skip as the base container does not fit to agent
if: false
steps:
- name: Checkout
uses: actions/checkout@v2

- name: Build NVIDIA Docker
uses: docker/build-push-action@v2
with:
file: dockers/nvidia/Dockerfile
push: false
timeout-minutes: 50

build-ipu:
runs-on: ubuntu-20.04
strategy:
Expand Down
24 changes: 24 additions & 0 deletions .github/workflows/ci_schema.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: CI action schema
on: # Trigger the workflow on push or pull request, but only for the master branch
push: {}
pull_request:
branches: [master, "release/*"]

jobs:
validate-schema:
runs-on: ubuntu-20.04
steps:
- name: Checkout
uses: actions/checkout@v2

- name: Install pkg
run: |
pip install check-jsonschema
- name: GH Workflows
run: |
check-jsonschema .github/workflows/*.yml --schemafile "https://json.schemastore.org/github-workflow"
- name: Azure Pipelines
run: |
check-jsonschema .azure-pipelines/*.yml --schemafile "https://raw.githubusercontent.com/microsoft/azure-pipelines-vscode/v1.188.1/service-schema.json"
24 changes: 0 additions & 24 deletions .github/workflows/events-nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -153,30 +153,6 @@ jobs:
tags: pytorchlightning/pytorch_lightning:base-conda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}
timeout-minutes: 55

docker-NVIDIA:
runs-on: ubuntu-20.04
# todo: temporarily skip as the base container does not fit to agent
if: false
steps:
- name: Checkout
uses: actions/checkout@v2

# https://github.com/docker/setup-buildx-action
# Set up Docker Buildx - to use cache-from and cache-to argument of buildx command
- uses: docker/setup-buildx-action@v1
- name: Login to DockerHub
uses: docker/login-action@v1
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}

- name: Publish NVIDIA to Docker Hub
uses: docker/build-push-action@v2
with:
file: dockers/nvidia/Dockerfile
tags: nvcr.io/pytorchlightning/pytorch_lightning:latest
timeout-minutes: 55

docker-IPU:
runs-on: ubuntu-20.04
strategy:
Expand Down

0 comments on commit 3586f2e

Please sign in to comment.