Skip to content

Commit

Permalink
fix: Windows EC2 runner SSM agent stopped working (#567)
Browse files Browse the repository at this point in the history
I am not sure when this started happening, but EC2Launch won't start SSM Agent until user data finishes executing. Since our user data runs until the very end, that never happens. Fix this by manually starting up SSM Agent.

Fixes #564
  • Loading branch information
kichik committed May 13, 2024
1 parent f2f854e commit f08da20
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 3 deletions.
4 changes: 4 additions & 0 deletions src/providers/ec2.ts
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ $runnerTokenPath="{}"
$labels="{}"
$registrationURL="{}"
# EC2Launch only starts ssm agent after user data is done, so we need to start it ourselves (it is disabled by default)
Set-Service -StartupType Manual AmazonSSMAgent
Start-Service AmazonSSMAgent
Start-Job -ScriptBlock {
while (1) {
aws stepfunctions send-task-heartbeat --task-token "$using:TASK_TOKEN"
Expand Down
4 changes: 2 additions & 2 deletions test/default.integ.snapshot/github-runners-test.assets.json
Original file line number Diff line number Diff line change
Expand Up @@ -209,15 +209,15 @@
}
}
},
"7e31d5548fbce527f8c0e4c0bab79dc8ae721e24c4f2934f30d81301929ae02d": {
"c7adc373a2e617c07b79256b0990e1a6ed3b10843f562ffcc8a783769346dce1": {
"source": {
"path": "github-runners-test.template.json",
"packaging": "file"
},
"destinations": {
"current_account-current_region": {
"bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}",
"objectKey": "7e31d5548fbce527f8c0e4c0bab79dc8ae721e24c4f2934f30d81301929ae02d.json",
"objectKey": "c7adc373a2e617c07b79256b0990e1a6ed3b10843f562ffcc8a783769346dce1.json",
"assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}"
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16123,7 +16123,7 @@
{
"Ref": "EC2Linuxarm64AMIRootDevice3046D37D"
},
"\",\"Ebs\":{\"DeleteOnTermination\":true,\"VolumeSize\":30}}]}},\"ec2, windows, x64 data\":{\"Type\":\"Pass\",\"ResultPath\":\"$.ec2\",\"Parameters\":{\"userdataTemplate\":\"<powershell>\\n$TASK_TOKEN = \\\"{}\\\"\\n$logGroupName=\\\"{}\\\"\\n$runnerNamePath=\\\"{}\\\"\\n$githubDomainPath=\\\"{}\\\"\\n$ownerPath=\\\"{}\\\"\\n$repoPath=\\\"{}\\\"\\n$runnerTokenPath=\\\"{}\\\"\\n$labels=\\\"{}\\\"\\n$registrationURL=\\\"{}\\\"\\n\\nStart-Job -ScriptBlock \\\\{\\n while (1) \\\\{\\n aws stepfunctions send-task-heartbeat --task-token \\\"$using:TASK_TOKEN\\\"\\n sleep 60\\n \\\\}\\n\\\\}\\nfunction setup_logs () \\\\{\\n echo \\\"\\\\{\\n `\\\"logs`\\\": \\\\{\\n `\\\"log_stream_name`\\\": `\\\"unknown`\\\",\\n `\\\"logs_collected`\\\": \\\\{\\n `\\\"files`\\\": \\\\{\\n `\\\"collect_list`\\\": [\\n \\\\{\\n `\\\"file_path`\\\": `\\\"/actions/runner.log`\\\",\\n `\\\"log_group_name`\\\": `\\\"$logGroupName`\\\",\\n `\\\"log_stream_name`\\\": `\\\"$runnerNamePath`\\\",\\n `\\\"timezone`\\\": `\\\"UTC`\\\"\\n \\\\}\\n ]\\n \\\\}\\n \\\\}\\n \\\\}\\n \\\\}\\\" | Out-File -Encoding ASCII $Env:TEMP/log.conf\\n & \\\"C:/Program Files/Amazon/AmazonCloudWatchAgent/amazon-cloudwatch-agent-ctl.ps1\\\" -a fetch-config -m ec2 -s -c file:$Env:TEMP/log.conf\\n\\\\}\\nfunction action () \\\\{\\n cd /actions\\n $RunnerVersion = Get-Content RUNNER_VERSION -Raw\\n if ($RunnerVersion -eq \\\"latest\\\") \\\\{ $RunnerFlags = \\\"\\\" \\\\} else \\\\{ $RunnerFlags = \\\"--disableupdate\\\" \\\\}\\n ./config.cmd --unattended --url \\\"$\\\\{registrationUrl\\\\}\\\" --token \\\"$\\\\{runnerTokenPath\\\\}\\\" --ephemeral --work _work --labels \\\"$\\\\{labels\\\\},cdkghr:started:$(Get-Date -UFormat +%s)\\\" $RunnerFlags --name \\\"$\\\\{runnerNamePath\\\\}\\\" 2>&1 | Out-File -Encoding ASCII -Append /actions/runner.log\\n\\n if ($LASTEXITCODE -ne 0) \\\\{ return 1 \\\\}\\n ./run.cmd 2>&1 | Out-File -Encoding ASCII -Append /actions/runner.log\\n if ($LASTEXITCODE -ne 0) \\\\{ return 2 \\\\}\\n\\n $STATUS = Select-String -Path './_diag/*.log' -Pattern 'finish job request for job [0-9a-f\\\\-]+ with result: (.*)' | %\\\\{$_.Matches.Groups[1].Value\\\\} | Select-Object -Last 1\\n\\n if ($STATUS) \\\\{\\n echo \\\"CDKGHA JOB DONE $\\\\{labels\\\\} $STATUS\\\" | Out-File -Encoding ASCII -Append /actions/runner.log\\n \\\\}\\n\\n return 0\\n\\\\}\\nsetup_logs\\n$r = action\\nif ($r -eq 0) \\\\{\\n aws stepfunctions send-task-success --task-token \\\"$TASK_TOKEN\\\" --task-output '\\\\{ \\\\}'\\n\\\\} else \\\\{\\n aws stepfunctions send-task-failure --task-token \\\"$TASK_TOKEN\\\"\\n\\\\}\\nStart-Sleep -Seconds 10 # give cloudwatch agent its default 5 seconds buffer duration to upload logs\\nStop-Computer -ComputerName localhost -Force\\n</powershell>\\n\"},\"Next\":\"ec2, windows, x64 subnet1\"},\"ec2, windows, x64 subnet1\":{\"End\":true,\"Catch\":[{\"ErrorEquals\":[\"Ec2.Ec2Exception\",\"States.Timeout\"],\"ResultPath\":\"$.lastSubnetError\",\"Next\":\"ec2, windows, x64 subnet2\"}],\"Type\":\"Task\",\"Comment\":\"",
"\",\"Ebs\":{\"DeleteOnTermination\":true,\"VolumeSize\":30}}]}},\"ec2, windows, x64 data\":{\"Type\":\"Pass\",\"ResultPath\":\"$.ec2\",\"Parameters\":{\"userdataTemplate\":\"<powershell>\\n$TASK_TOKEN = \\\"{}\\\"\\n$logGroupName=\\\"{}\\\"\\n$runnerNamePath=\\\"{}\\\"\\n$githubDomainPath=\\\"{}\\\"\\n$ownerPath=\\\"{}\\\"\\n$repoPath=\\\"{}\\\"\\n$runnerTokenPath=\\\"{}\\\"\\n$labels=\\\"{}\\\"\\n$registrationURL=\\\"{}\\\"\\n\\n# EC2Launch only starts ssm agent after user data is done, so we need to start it ourselves (it is disabled by default)\\nSet-Service -StartupType Manual AmazonSSMAgent\\nStart-Service AmazonSSMAgent\\n\\nStart-Job -ScriptBlock \\\\{\\n while (1) \\\\{\\n aws stepfunctions send-task-heartbeat --task-token \\\"$using:TASK_TOKEN\\\"\\n sleep 60\\n \\\\}\\n\\\\}\\nfunction setup_logs () \\\\{\\n echo \\\"\\\\{\\n `\\\"logs`\\\": \\\\{\\n `\\\"log_stream_name`\\\": `\\\"unknown`\\\",\\n `\\\"logs_collected`\\\": \\\\{\\n `\\\"files`\\\": \\\\{\\n `\\\"collect_list`\\\": [\\n \\\\{\\n `\\\"file_path`\\\": `\\\"/actions/runner.log`\\\",\\n `\\\"log_group_name`\\\": `\\\"$logGroupName`\\\",\\n `\\\"log_stream_name`\\\": `\\\"$runnerNamePath`\\\",\\n `\\\"timezone`\\\": `\\\"UTC`\\\"\\n \\\\}\\n ]\\n \\\\}\\n \\\\}\\n \\\\}\\n \\\\}\\\" | Out-File -Encoding ASCII $Env:TEMP/log.conf\\n & \\\"C:/Program Files/Amazon/AmazonCloudWatchAgent/amazon-cloudwatch-agent-ctl.ps1\\\" -a fetch-config -m ec2 -s -c file:$Env:TEMP/log.conf\\n\\\\}\\nfunction action () \\\\{\\n cd /actions\\n $RunnerVersion = Get-Content RUNNER_VERSION -Raw\\n if ($RunnerVersion -eq \\\"latest\\\") \\\\{ $RunnerFlags = \\\"\\\" \\\\} else \\\\{ $RunnerFlags = \\\"--disableupdate\\\" \\\\}\\n ./config.cmd --unattended --url \\\"$\\\\{registrationUrl\\\\}\\\" --token \\\"$\\\\{runnerTokenPath\\\\}\\\" --ephemeral --work _work --labels \\\"$\\\\{labels\\\\},cdkghr:started:$(Get-Date -UFormat +%s)\\\" $RunnerFlags --name \\\"$\\\\{runnerNamePath\\\\}\\\" 2>&1 | Out-File -Encoding ASCII -Append /actions/runner.log\\n\\n if ($LASTEXITCODE -ne 0) \\\\{ return 1 \\\\}\\n ./run.cmd 2>&1 | Out-File -Encoding ASCII -Append /actions/runner.log\\n if ($LASTEXITCODE -ne 0) \\\\{ return 2 \\\\}\\n\\n $STATUS = Select-String -Path './_diag/*.log' -Pattern 'finish job request for job [0-9a-f\\\\-]+ with result: (.*)' | %\\\\{$_.Matches.Groups[1].Value\\\\} | Select-Object -Last 1\\n\\n if ($STATUS) \\\\{\\n echo \\\"CDKGHA JOB DONE $\\\\{labels\\\\} $STATUS\\\" | Out-File -Encoding ASCII -Append /actions/runner.log\\n \\\\}\\n\\n return 0\\n\\\\}\\nsetup_logs\\n$r = action\\nif ($r -eq 0) \\\\{\\n aws stepfunctions send-task-success --task-token \\\"$TASK_TOKEN\\\" --task-output '\\\\{ \\\\}'\\n\\\\} else \\\\{\\n aws stepfunctions send-task-failure --task-token \\\"$TASK_TOKEN\\\"\\n\\\\}\\nStart-Sleep -Seconds 10 # give cloudwatch agent its default 5 seconds buffer duration to upload logs\\nStop-Computer -ComputerName localhost -Force\\n</powershell>\\n\"},\"Next\":\"ec2, windows, x64 subnet1\"},\"ec2, windows, x64 subnet1\":{\"End\":true,\"Catch\":[{\"ErrorEquals\":[\"Ec2.Ec2Exception\",\"States.Timeout\"],\"ResultPath\":\"$.lastSubnetError\",\"Next\":\"ec2, windows, x64 subnet2\"}],\"Type\":\"Task\",\"Comment\":\"",
{
"Ref": "VpcPublicSubnet1Subnet5C2D37C4"
},
Expand Down

0 comments on commit f08da20

Please sign in to comment.