Skip to content

Commit

Permalink
Initialize LLVM time trace profiler on each code generation thread
Browse files Browse the repository at this point in the history
In https://reviews.llvm.org/D71059 LLVM 11, the time trace profiler was
extended to support multiple threads.

`timeTraceProfilerInitialize` creates a thread local profiler instance.
When a thread finishes `timeTraceProfilerFinishThread` moves a thread
local instance into a global collection of instances. Finally when all
codegen work is complete `timeTraceProfilerWrite` writes data from the
current thread local instance and the instances in global collection
of instances.

Previously, the profiler was intialized on a single thread only. Since
this thread performs no code generation on its own, the resulting
profile was empty.

Update LLVM codegen to initialize & finish time trace profiler on each
code generation thread.
  • Loading branch information
tmiasko committed Nov 5, 2021
1 parent d22dd65 commit 5a09e12
Show file tree
Hide file tree
Showing 6 changed files with 128 additions and 56 deletions.
49 changes: 49 additions & 0 deletions compiler/rustc_codegen_llvm/src/lib.rs
Expand Up @@ -76,6 +76,27 @@ mod value;
#[derive(Clone)]
pub struct LlvmCodegenBackend(());

struct TimeTraceProfiler {
enabled: bool,
}

impl TimeTraceProfiler {
fn new(enabled: bool) -> Self {
if enabled {
unsafe { llvm::LLVMTimeTraceProfilerInitialize() }
}
TimeTraceProfiler { enabled }
}
}

impl Drop for TimeTraceProfiler {
fn drop(&mut self) {
if self.enabled {
unsafe { llvm::LLVMTimeTraceProfilerFinishThread() }
}
}
}

impl ExtraBackendMethods for LlvmCodegenBackend {
fn new_metadata(&self, tcx: TyCtxt<'_>, mod_name: &str) -> ModuleLlvm {
ModuleLlvm::new_metadata(tcx, mod_name)
Expand Down Expand Up @@ -119,6 +140,34 @@ impl ExtraBackendMethods for LlvmCodegenBackend {
fn tune_cpu<'b>(&self, sess: &'b Session) -> Option<&'b str> {
llvm_util::tune_cpu(sess)
}

fn spawn_thread<F, T>(time_trace: bool, f: F) -> std::thread::JoinHandle<T>
where
F: FnOnce() -> T,
F: Send + 'static,
T: Send + 'static,
{
std::thread::spawn(move || {
let _profiler = TimeTraceProfiler::new(time_trace);
f()
})
}

fn spawn_named_thread<F, T>(
time_trace: bool,
name: String,
f: F,
) -> std::io::Result<std::thread::JoinHandle<T>>
where
F: FnOnce() -> T,
F: Send + 'static,
T: Send + 'static,
{
std::thread::Builder::new().name(name).spawn(move || {
let _profiler = TimeTraceProfiler::new(time_trace);
f()
})
}
}

impl WriteBackendMethods for LlvmCodegenBackend {
Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_codegen_llvm/src/llvm/ffi.rs
Expand Up @@ -1737,6 +1737,8 @@ extern "C" {

pub fn LLVMTimeTraceProfilerInitialize();

pub fn LLVMTimeTraceProfilerFinishThread();

pub fn LLVMTimeTraceProfilerFinish(FileName: *const c_char);

pub fn LLVMAddAnalysisPasses(T: &'a TargetMachine, PM: &PassManager<'a>);
Expand Down
5 changes: 0 additions & 5 deletions compiler/rustc_codegen_llvm/src/llvm_util.rs
Expand Up @@ -113,11 +113,6 @@ unsafe fn configure_llvm(sess: &Session) {
}

if sess.opts.debugging_opts.llvm_time_trace {
// time-trace is not thread safe and running it in parallel will cause seg faults.
if !sess.opts.debugging_opts.no_parallel_llvm {
bug!("`-Z llvm-time-trace` requires `-Z no-parallel-llvm")
}

llvm::LLVMTimeTraceProfilerInitialize();
}

Expand Down
102 changes: 51 additions & 51 deletions compiler/rustc_codegen_ssa/src/back/write.rs
Expand Up @@ -310,6 +310,7 @@ pub struct CodegenContext<B: WriteBackendMethods> {
pub no_landing_pads: bool,
pub save_temps: bool,
pub fewer_names: bool,
pub time_trace: bool,
pub exported_symbols: Option<Arc<ExportedSymbols>>,
pub opts: Arc<config::Options>,
pub crate_types: Vec<CrateType>,
Expand Down Expand Up @@ -1039,6 +1040,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
no_landing_pads: sess.panic_strategy() == PanicStrategy::Abort,
fewer_names: sess.fewer_names(),
save_temps: sess.opts.cg.save_temps,
time_trace: sess.opts.debugging_opts.llvm_time_trace,
opts: Arc::new(sess.opts.clone()),
prof: sess.prof.clone(),
exported_symbols,
Expand Down Expand Up @@ -1198,7 +1200,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
// Each LLVM module is automatically sent back to the coordinator for LTO if
// necessary. There's already optimizations in place to avoid sending work
// back to the coordinator if LTO isn't requested.
return thread::spawn(move || {
return B::spawn_thread(cgcx.time_trace, move || {
let mut worker_id_counter = 0;
let mut free_worker_ids = Vec::new();
let mut get_worker_id = |free_worker_ids: &mut Vec<usize>| {
Expand Down Expand Up @@ -1615,59 +1617,57 @@ fn start_executing_work<B: ExtraBackendMethods>(
pub struct WorkerFatalError;

fn spawn_work<B: ExtraBackendMethods>(cgcx: CodegenContext<B>, work: WorkItem<B>) {
let builder = thread::Builder::new().name(work.short_description());
builder
.spawn(move || {
// Set up a destructor which will fire off a message that we're done as
// we exit.
struct Bomb<B: ExtraBackendMethods> {
coordinator_send: Sender<Box<dyn Any + Send>>,
result: Option<Result<WorkItemResult<B>, FatalError>>,
worker_id: usize,
}
impl<B: ExtraBackendMethods> Drop for Bomb<B> {
fn drop(&mut self) {
let worker_id = self.worker_id;
let msg = match self.result.take() {
Some(Ok(WorkItemResult::Compiled(m))) => {
Message::Done::<B> { result: Ok(m), worker_id }
}
Some(Ok(WorkItemResult::NeedsLink(m))) => {
Message::NeedsLink::<B> { module: m, worker_id }
}
Some(Ok(WorkItemResult::NeedsFatLTO(m))) => {
Message::NeedsFatLTO::<B> { result: m, worker_id }
}
Some(Ok(WorkItemResult::NeedsThinLTO(name, thin_buffer))) => {
Message::NeedsThinLTO::<B> { name, thin_buffer, worker_id }
}
Some(Err(FatalError)) => {
Message::Done::<B> { result: Err(Some(WorkerFatalError)), worker_id }
}
None => Message::Done::<B> { result: Err(None), worker_id },
};
drop(self.coordinator_send.send(Box::new(msg)));
}
B::spawn_named_thread(cgcx.time_trace, work.short_description(), move || {
// Set up a destructor which will fire off a message that we're done as
// we exit.
struct Bomb<B: ExtraBackendMethods> {
coordinator_send: Sender<Box<dyn Any + Send>>,
result: Option<Result<WorkItemResult<B>, FatalError>>,
worker_id: usize,
}
impl<B: ExtraBackendMethods> Drop for Bomb<B> {
fn drop(&mut self) {
let worker_id = self.worker_id;
let msg = match self.result.take() {
Some(Ok(WorkItemResult::Compiled(m))) => {
Message::Done::<B> { result: Ok(m), worker_id }
}
Some(Ok(WorkItemResult::NeedsLink(m))) => {
Message::NeedsLink::<B> { module: m, worker_id }
}
Some(Ok(WorkItemResult::NeedsFatLTO(m))) => {
Message::NeedsFatLTO::<B> { result: m, worker_id }
}
Some(Ok(WorkItemResult::NeedsThinLTO(name, thin_buffer))) => {
Message::NeedsThinLTO::<B> { name, thin_buffer, worker_id }
}
Some(Err(FatalError)) => {
Message::Done::<B> { result: Err(Some(WorkerFatalError)), worker_id }
}
None => Message::Done::<B> { result: Err(None), worker_id },
};
drop(self.coordinator_send.send(Box::new(msg)));
}
}

let mut bomb = Bomb::<B> {
coordinator_send: cgcx.coordinator_send.clone(),
result: None,
worker_id: cgcx.worker,
};
let mut bomb = Bomb::<B> {
coordinator_send: cgcx.coordinator_send.clone(),
result: None,
worker_id: cgcx.worker,
};

// Execute the work itself, and if it finishes successfully then flag
// ourselves as a success as well.
//
// Note that we ignore any `FatalError` coming out of `execute_work_item`,
// as a diagnostic was already sent off to the main thread - just
// surface that there was an error in this worker.
bomb.result = {
let _prof_timer = work.start_profiling(&cgcx);
Some(execute_work_item(&cgcx, work))
};
})
.expect("failed to spawn thread");
// Execute the work itself, and if it finishes successfully then flag
// ourselves as a success as well.
//
// Note that we ignore any `FatalError` coming out of `execute_work_item`,
// as a diagnostic was already sent off to the main thread - just
// surface that there was an error in this worker.
bomb.result = {
let _prof_timer = work.start_profiling(&cgcx);
Some(execute_work_item(&cgcx, work))
};
})
.expect("failed to spawn thread");
}

enum SharedEmitterMessage {
Expand Down
22 changes: 22 additions & 0 deletions compiler/rustc_codegen_ssa/src/traits/backend.rs
Expand Up @@ -142,4 +142,26 @@ pub trait ExtraBackendMethods: CodegenBackend + WriteBackendMethods + Sized + Se
) -> TargetMachineFactoryFn<Self>;
fn target_cpu<'b>(&self, sess: &'b Session) -> &'b str;
fn tune_cpu<'b>(&self, sess: &'b Session) -> Option<&'b str>;

fn spawn_thread<F, T>(_time_trace: bool, f: F) -> std::thread::JoinHandle<T>
where
F: FnOnce() -> T,
F: Send + 'static,
T: Send + 'static,
{
std::thread::spawn(f)
}

fn spawn_named_thread<F, T>(
_time_trace: bool,
name: String,
f: F,
) -> std::io::Result<std::thread::JoinHandle<T>>
where
F: FnOnce() -> T,
F: Send + 'static,
T: Send + 'static,
{
std::thread::Builder::new().name(name).spawn(f)
}
}
4 changes: 4 additions & 0 deletions compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp
Expand Up @@ -75,6 +75,10 @@ extern "C" void LLVMTimeTraceProfilerInitialize() {
/* ProcName */ "rustc");
}

extern "C" void LLVMTimeTraceProfilerFinishThread() {
timeTraceProfilerFinishThread();
}

extern "C" void LLVMTimeTraceProfilerFinish(const char* FileName) {
StringRef FN(FileName);
std::error_code EC;
Expand Down

0 comments on commit 5a09e12

Please sign in to comment.