diff --git a/.github/workflows/run-simulators.yml b/.github/workflows/run-simulators.yml index 885e386a4..3b04f79df 100644 --- a/.github/workflows/run-simulators.yml +++ b/.github/workflows/run-simulators.yml @@ -10,13 +10,42 @@ jobs: runs-on: ubuntu-latest concurrency: group: sim + outputs: + volume_id: ${{ steps.create_volume_step.outputs.volume_id }} + env: + INSTANCE_ID: ${{ secrets.AWS_EC2_INSTANCE_ID }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_DEFAULT_REGION: ${{ secrets.AWS_REGION }} steps: + - name: Create Volume from Latest Snapshot and Attach to Instance + id: create_volume_step + run: | + # Retrieve the latest snapshot ID + LATEST_SNAPSHOT_ID=$(aws ec2 describe-snapshots --owner-ids self --query 'Snapshots | sort_by(@, &StartTime) | [-1].SnapshotId' --output text) + echo "Checking availability for snapshot: $LATEST_SNAPSHOT_ID" + + # Wait for the snapshot to complete + aws ec2 wait snapshot-completed --snapshot-ids $LATEST_SNAPSHOT_ID + echo "Snapshot is ready." + + # Create a new volume from the latest snapshot + volume_id=$(aws ec2 create-volume --snapshot-id $LATEST_SNAPSHOT_ID --availability-zone us-west-1b --volume-type gp3 --size 400 --throughput 250 --query "VolumeId" --output text) + echo "Created volume with ID: $volume_id" + + # Set volume_id as output + echo "volume_id=$volume_id" >> $GITHUB_OUTPUT + cat $GITHUB_OUTPUT + + # Wait until the volume is available + aws ec2 wait volume-available --volume-ids $volume_id + echo "Volume is now available" + + # Attach the volume to the instance + aws ec2 attach-volume --volume-id $volume_id --instance-id $INSTANCE_ID --device /dev/sda1 + echo "Volume $volume_id attached to instance $INSTANCE_ID as /dev/sda1" + - name: Start EC2 Instance - env: - INSTANCE_ID: ${{ secrets.AWS_EC2_INSTANCE_ID }} - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - AWS_DEFAULT_REGION: ${{ secrets.AWS_REGION }} run: | # Get the instance state instance_state=$(aws ec2 describe-instances --instance-ids $INSTANCE_ID | jq -r '.Reservations[].Instances[].State.Name') @@ -27,7 +56,7 @@ jobs: sleep 10 instance_state=$(aws ec2 describe-instances --instance-ids $INSTANCE_ID | jq -r '.Reservations[].Instances[].State.Name') done - + # Check if instance state is "stopped" if [[ "$instance_state" == "stopped" ]]; then echo "Instance is stopped, starting it..." @@ -42,34 +71,17 @@ jobs: exit 1 fi - # wait for status checks to pass - TIMEOUT=300 # Timeout in seconds - START_TIME=$(date +%s) - END_TIME=$((START_TIME + TIMEOUT)) - while true; do - response=$(aws ec2 describe-instance-status --instance-ids $INSTANCE_ID) - system_status=$(echo "$response" | jq -r '.InstanceStatuses[0].SystemStatus.Status') - instance_status=$(echo "$response" | jq -r '.InstanceStatuses[0].InstanceStatus.Status') - - if [[ "$system_status" == "ok" && "$instance_status" == "ok" ]]; then - echo "Both SystemStatus and InstanceStatus are 'ok'" - exit 0 - fi - - CURRENT_TIME=$(date +%s) - if [[ "$CURRENT_TIME" -ge "$END_TIME" ]]; then - echo "Timeout: Both SystemStatus and InstanceStatus have not reached 'ok' state within $TIMEOUT seconds." - exit 1 - fi - - sleep 10 # Check status every 10 seconds - done + # Wait for instance status checks to pass + echo "Waiting for instance status checks to pass..." + aws ec2 wait instance-status-ok --instance-ids $INSTANCE_ID + echo "Instance is now ready for use." + check_simulator_version_updates: name: check_simulator_version_updates runs-on: ubuntu-latest needs: start_ec2_instance - steps: + steps: - name: Check for Simulator Version Updates env: PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }} @@ -109,11 +121,11 @@ jobs: echo "NVIDIA Driver is not set" exit 1 fi - ' + ' - name: NVIDIA Driver is not set if: ${{ failure() }} run: | - echo "NVIDIA SMI is not working, please run the steps here on the instance:" + echo "NVIDIA SMI is not working, please run the steps here on the instance:" echo "https://scenic-lang.atlassian.net/wiki/spaces/KAN/pages/2785287/Setting+Up+AWS+VM?parentProduct=JSW&initialAllowedFeatures=byline-contributors.byline-extensions.page-comments.delete.page-reactions.inline-comments.non-licensed-share&themeState=dark%253Adark%2520light%253Alight%2520spacing%253Aspacing%2520colorMode%253Alight&locale=en-US#Install-NVIDIA-Drivers" run_carla_simulators: @@ -128,17 +140,17 @@ jobs: USER_NAME: ${{secrets.SSH_USERNAME}} run: | echo "$PRIVATE_KEY" > private_key && chmod 600 private_key - ssh -o StrictHostKeyChecking=no -i private_key ${USER_NAME}@${HOSTNAME} ' + ssh -o StrictHostKeyChecking=no -o ServerAliveInterval=60 -o ServerAliveCountMax=3 -i private_key ${USER_NAME}@${HOSTNAME} ' cd /home/ubuntu/actions/Scenic && source venv/bin/activate && carla_versions=($(find /software -maxdepth 1 -type d -name 'carla*')) && for version in "${carla_versions[@]}"; do - echo "============================= CARLA $version =============================" + echo "============================= CARLA $version =============================" export CARLA_ROOT="$version" pytest tests/simulators/carla done ' - + run_webots_simulators: name: run_webots_simulators runs-on: ubuntu-latest @@ -164,39 +176,44 @@ jobs: done kill %1 ' - + stop_ec2_instance: name: stop_ec2_instance runs-on: ubuntu-latest - needs: [run_carla_simulators, run_webots_simulators] - steps: + needs: [start_ec2_instance, check_simulator_version_updates, check_nvidia_smi, run_carla_simulators, run_webots_simulators] + if: always() + env: + VOLUME_ID: ${{ needs.start_ec2_instance.outputs.volume_id }} + INSTANCE_ID: ${{ secrets.AWS_EC2_INSTANCE_ID }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_DEFAULT_REGION: ${{ secrets.AWS_REGION }} + steps: - name: Stop EC2 Instance - env: - INSTANCE_ID: ${{ secrets.AWS_EC2_INSTANCE_ID }} - AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} - AWS_DEFAULT_REGION: ${{ secrets.AWS_REGION }} run: | - # Get the instance state + # Get the instance state and stop it if running instance_state=$(aws ec2 describe-instances --instance-ids $INSTANCE_ID | jq -r '.Reservations[].Instances[].State.Name') - - # If the machine is pending wait for it to fully start - while [ "$instance_state" == "pending" ]; do - echo "Instance is pending startup, waiting for it to fully start..." - sleep 10 - instance_state=$(aws ec2 describe-instances --instance-ids $INSTANCE_ID | jq -r '.Reservations[].Instances[].State.Name') - done - - # Check if instance state is "stopped" if [[ "$instance_state" == "running" ]]; then - echo "Instance is running, stopping it..." - aws ec2 stop-instances --instance-ids $INSTANCE_ID - elif [[ "$instance_state" == "stopping" ]]; then - echo "Instance is stopping..." + echo "Instance is running, stopping it..." + aws ec2 stop-instances --instance-ids $INSTANCE_ID + aws ec2 wait instance-stopped --instance-ids $INSTANCE_ID + echo "Instance has stopped." elif [[ "$instance_state" == "stopped" ]]; then - echo "Instance is already stopped..." - exit 0 + echo "Instance is already stopped." else - echo "Unknown instance state: $instance_state" - exit 1 + echo "Unexpected instance state: $instance_state" + exit 1 fi + + - name: Detach Volume + run: | + # Detach the volume + aws ec2 detach-volume --volume-id $VOLUME_ID + aws ec2 wait volume-available --volume-ids $VOLUME_ID + echo "Volume $VOLUME_ID detached." + + - name: Delete Volume + run: | + # Delete the volume after snapshot is complete + aws ec2 delete-volume --volume-id $VOLUME_ID + echo "Volume $VOLUME_ID deleted." diff --git a/tests/simulators/carla/test_actions.py b/tests/simulators/carla/test_actions.py index f0aede475..7914ad04a 100644 --- a/tests/simulators/carla/test_actions.py +++ b/tests/simulators/carla/test_actions.py @@ -43,19 +43,21 @@ def getCarlaSimulator(getAssetPath): f"bash {CARLA_ROOT}/CarlaUE4.sh -RenderOffScreen", shell=True ) - for _ in range(30): + for _ in range(180): if isCarlaServerRunning(): break time.sleep(1) + else: + pytest.fail("Unable to connect to CARLA.") # Extra 5 seconds to ensure server startup - time.sleep(5) + time.sleep(10) base = getAssetPath("maps/CARLA") def _getCarlaSimulator(town): path = os.path.join(base, f"{town}.xodr") - simulator = CarlaSimulator(map_path=path, carla_map=town) + simulator = CarlaSimulator(map_path=path, carla_map=town, timeout=180) return simulator, town, path yield _getCarlaSimulator @@ -76,7 +78,7 @@ def test_throttle(getCarlaSimulator): behavior DriveWithThrottle(): while True: take SetThrottleAction(1) - + ego = new Car at (369, -326), with behavior DriveWithThrottle record ego.speed as CarSpeed terminate after 5 steps @@ -109,8 +111,8 @@ def test_brake(getCarlaSimulator): do DriveWithThrottle() for 2 steps do Brake() for 6 steps - ego = new Car at (369, -326), - with blueprint 'vehicle.toyota.prius', + ego = new Car at (369, -326), + with blueprint 'vehicle.toyota.prius', with behavior DriveThenBrake record final ego.speed as CarSpeed terminate after 8 steps