Skip to content

Commit

Permalink
Merge branch 'develop' into lifecycle_managerment
Browse files Browse the repository at this point in the history
  • Loading branch information
typhoonzero committed Mar 6, 2018
2 parents 189e26f + 99a82a9 commit d8cf99e
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 7 deletions.
13 changes: 8 additions & 5 deletions docker/paddle_k8s
Original file line number Diff line number Diff line change
Expand Up @@ -65,20 +65,23 @@ start_fluid_process() {
task_index=""

stdbuf -oL python /root/k8s_tools.py wait_pods_running ${pserver_label} ${PSERVERS}
stdbuf -oL python /root/k8s_tools.py wait_pods_running ${trainer_label} ${TRAINERS}

ps_hosts=$(python /root/k8s_tools.py fetch_endpoints ${pserver_label} ${PADDLE_INIT_PORT})
trainer_hosts=$(python /root/k8s_tools.py fetch_endpoints ${trainer_label} ${PADDLE_INIT_PORT})
if [ "${TRAINING_ROLE}" == "TRAINER" ]; then
stdbuf -oL python /root/k8s_tools.py wait_pods_running ${trainer_label} ${TRAINERS}
fi

export PADDLE_INIT_PSERVERS=$(python /root/k8s_tools.py fetch_ips ${pserver_label} ${PADDLE_INIT_PORT})

if [ "${TRAINING_ROLE}" == "TRAINER" ]; then
check_failed_cnt ${TRAINERS}
task_index=$(python /root/k8s_tools.py fetch_id ${trainer_label})
else
task_index=$(python /root/k8s_tools.py fetch_id ${pserver_label})
fi

export PADDLE_INIT_TRAINER_ID=${task_index}

stdbuf -oL sh -c "${ENTRY} --ps_hosts=${ps_hosts} --trainer_hosts=${trainer_hosts} \
--task_index=${task_index}"
stdbuf -oL sh -c "${ENTRY}"
check_trainer_ret $?
}

Expand Down
2 changes: 1 addition & 1 deletion go/apis/paddlepaddle/v1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ type TrainerSpec struct {
MinInstance int `json:"min-instance"`
MaxInstance int `json:"max-instance"`
Resources apiv1.ResourceRequirements `json:"resources"`
ReplicaSpec *batchv1.JobSpec `json:"replicaSpec"`
ReplicaSpec *batchv1.Job `json:"replicaSpec"`
}

// TrainingJobPhase is the phase of TrainingJob
Expand Down
2 changes: 1 addition & 1 deletion go/apis/paddlepaddle/v1/zz_generated.deepcopy.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ func (in *TrainerSpec) DeepCopyInto(out *TrainerSpec) {
if *in == nil {
*out = nil
} else {
*out = new(batch_v1.JobSpec)
*out = new(batch_v1.Job)
(*in).DeepCopyInto(*out)
}
}
Expand Down

0 comments on commit d8cf99e

Please sign in to comment.