You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: .circleci/config.yml
+5-3
Original file line number
Diff line number
Diff line change
@@ -62,10 +62,11 @@ references:
62
62
# happened to the job in Kubernetes. If we try MAX_CHECKS times and
63
63
# still the job hasn't finished, give up and return the starting
64
64
# non-zero status code.
65
-
while [ $i -lt $MAX_CHECKS ]; do ((i++)); if kubectl get jobs $job_name -o jsonpath='Failed:{.status.failed}' | grep "Failed:1"; then status_code=1 && break; elif kubectl get jobs $job_name -o jsonpath='Succeeded:{.status.succeeded}' | grep "Succeeded:1" ; then status_code=0 && break; else echo "Job not finished yet"; fi; sleep 30; done && \
65
+
printf "Waiting for job to finish: " && \
66
+
while [ $i -lt $MAX_CHECKS ]; do ((i++)); if kubectl get jobs $job_name -o jsonpath='Failed:{.status.failed}' | grep "Failed:1"; then status_code=1 && break; elif kubectl get jobs $job_name -o jsonpath='Succeeded:{.status.succeeded}' | grep "Succeeded:1" ; then status_code=0 && break; else printf "."; fi; sleep $CHECK_SPEEP; done && \
66
67
echo "Done waiting. Job status code: $status_code" && \
echo "Waiting on kubernetes job: $job_name in cluster: $GKE_CLUSTER" && \
84
86
i=0 && \
85
-
# 30 checks spaced 30s apart = 900s total.
86
-
max_checks=30 && \
87
+
# 60 checks spaced 30s apart = 900s total.
87
88
status_code=2 && \
88
89
# Check on the job periodically. Set the status code depending on what
89
-
# happened to the job in Kubernetes. If we try max_checks times and
90
+
# happened to the job in Kubernetes. If we try MAX_CHECKS times and
90
91
# still the job hasn't finished, give up and return the starting
91
92
# non-zero status code.
92
-
while [ $i -lt $max_checks ]; do ((i++)); if kubectl get jobs $job_name -o jsonpath='Failed:{.status.failed}' | grep "Failed:1"; then status_code=1 && break; elif kubectl get jobs $job_name -o jsonpath='Succeeded:{.status.succeeded}' | grep "Succeeded:1" ; then status_code=0 && break; else echo "Job not finished yet"; fi; sleep 30; done && \
93
+
printf "Waiting for job to finish: " && \
94
+
while [ $i -lt $MAX_CHECKS ]; do ((i++)); if kubectl get jobs $job_name -o jsonpath='Failed:{.status.failed}' | grep "Failed:1"; then status_code=1 && break; elif kubectl get jobs $job_name -o jsonpath='Succeeded:{.status.succeeded}' | grep "Succeeded:1" ; then status_code=0 && break; else printf "." ; fi; sleep $CHECK_SPEEP; done && \
93
95
echo "Done waiting. Job status code: $status_code" && \
Copy file name to clipboardExpand all lines: CHANGELOG.md
+2
Original file line number
Diff line number
Diff line change
@@ -38,6 +38,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
38
38
39
39
- Fixed `weights_save_path` getting ignored when `logger=False` is passed to Trainer ([#2681](https://github.com/PyTorchLightning/pytorch-lightning/pull/2681))
40
40
41
+
- Fixed TPU multi-core and Float16 ([#2632](https://github.com/PyTorchLightning/pytorch-lightning/pull/2632))
0 commit comments