Skip to content

Commit

Permalink
finish cdn_gpu script
Browse files Browse the repository at this point in the history
  • Loading branch information
dailinsubjam committed May 22, 2024
1 parent d9f37bc commit 305fe89
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 27 deletions.
26 changes: 13 additions & 13 deletions scripts/aws_ecs_benchmarks_cdn.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,27 +12,27 @@ keydb_address=redis://"$ip":6379

# Check if at least two arguments are provided
if [ $# -lt 2 ]; then
echo "Usage: $0 <REMOTE_USER> <REMOTE_HOST>"
echo "Usage: $0 <REMOTE_USER> <REMOTE_BROKER_HOST>"
exit 1
fi
REMOTE_USER="$1" #"sishan"
REMOTE_BROKER_HOST="$2" #"3.135.239.251"

# this is to prevent "Error: Too many open files (os error 24). Pausing for 500ms"
ulimit -n 65536
# # build to get the bin in advance, uncomment the following if built first time
# just async_std example validator-push-cdn -- http://localhost:4444 &
# # remember to sleep enough time if it's built first time
# sleep 3m
# for pid in $(ps -ef | grep "validator" | awk '{print $2}'); do kill -9 $pid; done
# build to get the bin in advance, uncomment the following if built first time
just async_std example_fixed_leader validator-push-cdn -- http://localhost:4444 &
# remember to sleep enough time if it's built first time
sleep 3m
for pid in $(ps -ef | grep "validator" | awk '{print $2}'); do kill -9 $pid; done

# # docker build and push
# docker build . -f ./docker/validator-cdn-local.Dockerfile -t ghcr.io/espressosystems/hotshot/validator-webserver:main-async-std
# docker push ghcr.io/espressosystems/hotshot/validator-webserver:main-async-std
# docker build and push
docker build . -f ./docker/validator-cdn-local.Dockerfile -t ghcr.io/espressosystems/hotshot/validator-webserver:main-async-std
docker push ghcr.io/espressosystems/hotshot/validator-webserver:main-async-std

# # ecs deploy
# ecs deploy --region us-east-2 hotshot hotshot_centralized -i centralized ghcr.io/espressosystems/hotshot/validator-webserver:main-async-std
# ecs deploy --region us-east-2 hotshot hotshot_centralized -c centralized ${orchestrator_url} # http://172.31.8.82:4444
# ecs deploy
ecs deploy --region us-east-2 hotshot hotshot_centralized -i centralized ghcr.io/espressosystems/hotshot/validator-webserver:main-async-std
ecs deploy --region us-east-2 hotshot hotshot_centralized -c centralized ${orchestrator_url} # http://172.31.8.82:4444

# runstart keydb
# docker run --rm -p 0.0.0.0:6379:6379 eqalpha/keydb &
Expand Down Expand Up @@ -93,7 +93,7 @@ EOF
--rounds ${rounds} \
--fixed_leader_for_gpuvid ${fixed_leader_for_gpuvid} \
--cdn_marshal_address ${cdn_marshal_address} \
--commit_sha cdn_simple_builder &
--commit_sha cdn_simple_builder_fixed_leader &
sleep 30

# start validators
Expand Down
41 changes: 28 additions & 13 deletions scripts/aws_ecs_benchmarks_cdn_gpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,19 @@ orchestrator_url=http://"$ip":4444
cdn_marshal_address="$ip":9000
keydb_address=redis://"$ip":6379

# Check if at least two arguments are provided
if [ $# -lt 3 ]; then
echo "Usage: $0 <REMOTE_USER> <REMOTE_BROKER_HOST> <REMOTE_GPU_HOST>"
exit 1
fi
REMOTE_USER="$1" #"sishan"
REMOTE_BROKER_HOST="$2" #"3.135.239.251"
REMOTE_GPU_HOST="$3" #"18.220.24.72"
# this is to prevent "Error: Too many open files (os error 24). Pausing for 500ms"
ulimit -n 65536

# build to get the bin in advance, uncomment the following if built first time
just async_std example validator-push-cdn -- http://localhost:4444 &
just async_std example_fixed_leader validator-push-cdn -- http://localhost:4444 &
# remember to sleep enough time if it's built first time
sleep 3m
for pid in $(ps -ef | grep "validator" | awk '{print $2}'); do kill -9 $pid; done
Expand Down Expand Up @@ -41,21 +52,21 @@ round_up() {
# total_nodes, da_committee_size, transactions_per_round, transaction_size = 100, 10, 1, 4096
# for iteration of assignment
# see `aws_ecs_benchmarks_webserver.sh` for an example
for total_nodes in 10 # 50 100 200 500 1000
for total_nodes in 10 50 100 200 500 1000
do
for da_committee_size in 5 # 10 50 100
for da_committee_size in 5 10 50 100
do
if [ $da_committee_size -le $total_nodes ]
then
for transactions_per_round in 1 # 10 50 100
for transactions_per_round in 1 10
do
for transaction_size in 1000000 # 100000 1000000 10000000 20000000 # 512 4096
for transaction_size in 100000 1000000 10000000 20000000
do
for fixed_leader_for_gpuvid in 1 # 5 10 50 100
for fixed_leader_for_gpuvid in 1 5 10
do
if [ $fixed_leader_for_gpuvid -le $da_committee_size ]
then
for rounds in 100 # 50
for rounds in 100
do
# server1: broker
echo -e "\e[35mGoing to start cdn-broker on local server\e[0m"
Expand Down Expand Up @@ -84,16 +95,20 @@ EOF
--rounds ${rounds} \
--fixed_leader_for_gpuvid ${fixed_leader_for_gpuvid} \
--cdn_marshal_address ${cdn_marshal_address} \
--commit_sha random_tx &
--commit_sha cdn_with_gpu &
sleep 30

# start leaders need to run on GPU FIRST
# and WAIT for enough time till it registerred at orchestrator
# make sure you're able to access the remote nvidia gpu server
echo -e "\e[35mGoing to start leaders on remote gpu server\e[0m"
REMOTE_GPU_HOST="18.220.24.72"
COMMAND_GPU_LEADER="./HotShot/scripts/benchmarks_start_leader_gpu.sh ${fixed_leader_for_gpuvid} ${orchestrator_url}"
ssh $REMOTE_USER@$REMOTE_GPU_HOST "$COMMAND_GPU_LEADER exit"

ssh $REMOTE_USER@$REMOTE_GPU_HOST << EOF
cd HotShot
nohup bash scripts/benchmarks_start_leader_gpu.sh ${fixed_leader_for_gpuvid} ${orchestrator_url} > nohup.out 2>&1 &
exit
EOF

sleep 1m

# start validators
Expand All @@ -102,7 +117,7 @@ EOF
base=100
mul=$(echo "l($transaction_size * $transactions_per_round)/l($base)" | bc -l)
mul=$(round_up $mul)
sleep_time=$(( ($rounds + $total_nodes) * $mul ))
sleep_time=$(( ($rounds + $total_nodes / 2) * $mul ))
echo -e "\e[35msleep_time: $sleep_time\e[0m"
sleep $sleep_time

Expand All @@ -118,7 +133,7 @@ EOF
# shut down brokers
echo -e "\e[35mGoing to stop cdn-broker\e[0m"
killall -9 cdn-broker
ssh $REMOTE_USER@$REMOTE_BROKER_HOST "./HotShot/scripts/shutdown.sh exit"
ssh $REMOTE_USER@$REMOTE_BROKER_HOST "killall -9 cdn-broker && exit"
# remove brokers from keydb
# you'll need to do `echo DEL brokers | keydb-cli -a THE_PASSWORD` and set it to whatever password you set
echo DEL brokers | keydb-cli
Expand Down
4 changes: 3 additions & 1 deletion scripts/benchmarks_results/results_init_run.csv
Original file line number Diff line number Diff line change
Expand Up @@ -173,4 +173,6 @@ real_2_broker,10,10,1,10,100016,100,fixed-leader-election,Full,1,0,3,3097269,960
commit_sha,total_nodes,da_committee_size,fixed_leader_for_gpuvid,transactions_per_round,transaction_size,rounds,leader_election_type,partial_results,avg_latency_in_sec,minimum_latency_in_sec,maximum_latency_in_sec,throughput_bytes_per_sec,total_transactions_committed,total_time_elapsed_in_sec,total_num_views,failed_num_views
real_2_broker,1000,10,1,1,1000016,100,fixed-leader-election,Half,140,13,363,2636913,1416,537,100,0
commit_sha,total_nodes,da_committee_size,fixed_leader_for_gpuvid,transactions_per_round,transaction_size,rounds,leader_election_type,partial_results,avg_latency_in_sec,minimum_latency_in_sec,maximum_latency_in_sec,throughput_bytes_per_sec,total_transactions_committed,total_time_elapsed_in_sec,total_num_views,failed_num_views
real_2_broker,100,10,1,1,10000016,100,fixed-leader-election,Half,27,11,68,585586,26,444,102,2
real_2_broker,100,10,1,1,10000016,100,fixed-leader-election,Half,27,11,68,585586,26,444,102,2
commit_sha,total_nodes,da_committee_size,fixed_leader_for_gpuvid,transactions_per_round,transaction_size,rounds,leader_election_type,partial_results,avg_latency_in_sec,minimum_latency_in_sec,maximum_latency_in_sec,throughput_bytes_per_sec,total_transactions_committed,total_time_elapsed_in_sec,total_num_views,failed_num_views
cdn_with_gpu,10,5,1,1,1000016,100,fixed-leader-election,Full,1,0,4,3310397,96,29,100,0

0 comments on commit 305fe89

Please sign in to comment.