Skip to content

Commit

Permalink
Merge pull request #633 from typhoonzero/fix_fluid_entry
Browse files Browse the repository at this point in the history
Fix fluid start entry
  • Loading branch information
typhoonzero committed Mar 6, 2018
2 parents 2a6aa52 + 789fd4e commit 2d2c23c
Showing 1 changed file with 8 additions and 5 deletions.
13 changes: 8 additions & 5 deletions docker/paddle_k8s
Original file line number Diff line number Diff line change
Expand Up @@ -65,20 +65,23 @@ start_fluid_process() {
task_index=""

stdbuf -oL python /root/k8s_tools.py wait_pods_running ${pserver_label} ${PSERVERS}
stdbuf -oL python /root/k8s_tools.py wait_pods_running ${trainer_label} ${TRAINERS}

ps_hosts=$(python /root/k8s_tools.py fetch_endpoints ${pserver_label} ${PADDLE_INIT_PORT})
trainer_hosts=$(python /root/k8s_tools.py fetch_endpoints ${trainer_label} ${PADDLE_INIT_PORT})
if [ "${TRAINING_ROLE}" == "TRAINER" ]; then
stdbuf -oL python /root/k8s_tools.py wait_pods_running ${trainer_label} ${TRAINERS}
fi

export PADDLE_INIT_PSERVERS=$(python /root/k8s_tools.py fetch_ips ${pserver_label} ${PADDLE_INIT_PORT})

if [ "${TRAINING_ROLE}" == "TRAINER" ]; then
check_failed_cnt ${TRAINERS}
task_index=$(python /root/k8s_tools.py fetch_id ${trainer_label})
else
task_index=$(python /root/k8s_tools.py fetch_id ${pserver_label})
fi

export PADDLE_INIT_TRAINER_ID=${task_index}

stdbuf -oL sh -c "${ENTRY} --ps_hosts=${ps_hosts} --trainer_hosts=${trainer_hosts} \
--task_index=${task_index}"
stdbuf -oL sh -c "${ENTRY}"
check_trainer_ret $?
}

Expand Down

0 comments on commit 2d2c23c

Please sign in to comment.