diff --git a/.azure-pipelines/steps/run.yml b/.azure-pipelines/steps/run.yml index 49bac629e7285..4875e2c6754a4 100644 --- a/.azure-pipelines/steps/run.yml +++ b/.azure-pipelines/steps/run.yml @@ -11,6 +11,12 @@ steps: - checkout: self fetchDepth: 2 +# Spawn a background process to collect CPU usage statistics which we'll upload +# at the end of the build. See the comments in the script here for more +# information. +- bash: python src/ci/cpu-usage-over-time.py &> cpu-usage.csv & + displayName: "Collect CPU-usage statistics in the background" + - bash: printenv | sort displayName: Show environment variables @@ -142,3 +148,13 @@ steps: AWS_SECRET_ACCESS_KEY: $(AWS_SECRET_ACCESS_KEY) condition: and(succeeded(), or(eq(variables.DEPLOY, '1'), eq(variables.DEPLOY_ALT, '1'))) displayName: Upload artifacts + +# Upload CPU usage statistics that we've been gathering this whole time. Always +# execute this step in case we want to inspect failed builds, but don't let +# errors here ever fail the build since this is just informational. +- bash: aws s3 cp --acl public-read cpu-usage.csv s3://$DEPLOY_BUCKET/rustc-builds/$BUILD_SOURCEVERSION/cpu-$SYSTEM_JOBNAME.csv + env: + AWS_SECRET_ACCESS_KEY: $(AWS_SECRET_ACCESS_KEY) + condition: contains(variables, 'AWS_SECRET_ACCESS_KEY') + continueOnError: true + displayName: Upload CPU usage statistics diff --git a/src/ci/cpu-usage-over-time.py b/src/ci/cpu-usage-over-time.py new file mode 100644 index 0000000000000..78427a6360a9f --- /dev/null +++ b/src/ci/cpu-usage-over-time.py @@ -0,0 +1,175 @@ +#!/usr/bin/env python +# ignore-tidy-linelength + +# This is a small script that we use on CI to collect CPU usage statistics of +# our builders. By seeing graphs of CPU usage over time we hope to correlate +# that with possible improvements to Rust's own build system, ideally diagnosing +# that either builders are always fully using their CPU resources or they're +# idle for long stretches of time. +# +# This script is relatively simple, but it's platform specific. Each platform +# (OSX/Windows/Linux) has a different way of calculating the current state of +# CPU at a point in time. We then compare two captured states to determine the +# percentage of time spent in one state versus another. The state capturing is +# all platform-specific but the loop at the bottom is the cross platform part +# that executes everywhere. +# +# # Viewing statistics +# +# All builders will upload their CPU statistics as CSV files to our S3 buckets. +# These URLS look like: +# +# https://$bucket.s3.amazonaws.com/rustc-builds/$commit/cpu-$builder.csv +# +# for example +# +# https://rust-lang-ci2.s3.amazonaws.com/rustc-builds/68baada19cd5340f05f0db15a3e16d6671609bcc/cpu-x86_64-apple.csv +# +# Each CSV file has two columns. The first is the timestamp of the measurement +# and the second column is the % of idle cpu time in that time slice. Ideally +# the second column is always zero. +# +# Once you've downloaded a file there's various ways to plot it and visualize +# it. For command line usage you can use a script like so: +# +# set timefmt '%Y-%m-%dT%H:%M:%S' +# set xdata time +# set ylabel "Idle CPU %" +# set xlabel "Time" +# set datafile sep ',' +# set term png +# set output "printme.png" +# set grid +# builder = "i686-apple" +# plot "cpu-".builder.".csv" using 1:2 with lines title builder +# +# Executed as `gnuplot < ./foo.plot` it will generate a graph called +# `printme.png` which you can then open up. If you know how to improve this +# script or the viewing process that would be much appreciated :) (or even if +# you know how to automate it!) + +import datetime +import sys +import time + +if sys.platform == 'linux2': + class State: + def __init__(self): + with open('/proc/stat', 'r') as file: + data = file.readline().split() + if data[0] != 'cpu': + raise Exception('did not start with "cpu"') + self.user = int(data[1]) + self.nice = int(data[2]) + self.system = int(data[3]) + self.idle = int(data[4]) + self.iowait = int(data[5]) + self.irq = int(data[6]) + self.softirq = int(data[7]) + self.steal = int(data[8]) + self.guest = int(data[9]) + self.guest_nice = int(data[10]) + + def idle_since(self, prev): + user = self.user - prev.user + nice = self.nice - prev.nice + system = self.system - prev.system + idle = self.idle - prev.idle + iowait = self.iowait - prev.iowait + irq = self.irq - prev.irq + softirq = self.softirq - prev.softirq + steal = self.steal - prev.steal + guest = self.guest - prev.guest + guest_nice = self.guest_nice - prev.guest_nice + total = user + nice + system + idle + iowait + irq + softirq + steal + guest + guest_nice + return float(idle) / float(total) * 100 + +elif sys.platform == 'win32': + from ctypes.wintypes import DWORD + from ctypes import Structure, windll, WinError, GetLastError, byref + + class FILETIME(Structure): + _fields_ = [ + ("dwLowDateTime", DWORD), + ("dwHighDateTime", DWORD), + ] + + class State: + def __init__(self): + idle, kernel, user = FILETIME(), FILETIME(), FILETIME() + + success = windll.kernel32.GetSystemTimes( + byref(idle), + byref(kernel), + byref(user), + ) + + assert success, WinError(GetLastError())[1] + + self.idle = (idle.dwHighDateTime << 32) | idle.dwLowDateTime + self.kernel = (kernel.dwHighDateTime << 32) | kernel.dwLowDateTime + self.user = (user.dwHighDateTime << 32) | user.dwLowDateTime + + def idle_since(self, prev): + idle = self.idle - prev.idle + user = self.user - prev.user + kernel = self.kernel - prev.kernel + return float(idle) / float(user + kernel) * 100 + +elif sys.platform == 'darwin': + from ctypes import * + libc = cdll.LoadLibrary('/usr/lib/libc.dylib') + + PROESSOR_CPU_LOAD_INFO = c_int(2) + CPU_STATE_USER = 0 + CPU_STATE_SYSTEM = 1 + CPU_STATE_IDLE = 2 + CPU_STATE_NICE = 3 + c_int_p = POINTER(c_int) + + class State: + def __init__(self): + num_cpus_u = c_uint(0) + cpu_info = c_int_p() + cpu_info_cnt = c_int(0) + err = libc.host_processor_info( + libc.mach_host_self(), + PROESSOR_CPU_LOAD_INFO, + byref(num_cpus_u), + byref(cpu_info), + byref(cpu_info_cnt), + ) + assert err == 0 + self.user = 0 + self.system = 0 + self.idle = 0 + self.nice = 0 + cur = 0 + while cur < cpu_info_cnt.value: + self.user += cpu_info[cur + CPU_STATE_USER] + self.system += cpu_info[cur + CPU_STATE_SYSTEM] + self.idle += cpu_info[cur + CPU_STATE_IDLE] + self.nice += cpu_info[cur + CPU_STATE_NICE] + cur += num_cpus_u.value + + def idle_since(self, prev): + user = self.user - prev.user + system = self.system - prev.system + idle = self.idle - prev.idle + nice = self.nice - prev.nice + return float(idle) / float(user + system + idle + nice) * 100.0 + +else: + print('unknown platform', sys.platform) + sys.exit(1) + +cur_state = State(); +print("Time,Idle") +while True: + time.sleep(1); + next_state = State(); + now = datetime.datetime.utcnow().isoformat() + idle = next_state.idle_since(cur_state) + print("%s,%s" % (now, idle)) + sys.stdout.flush() + cur_state = next_state