Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix profiling on OS X #4159

Merged
merged 5 commits into from
Aug 28, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion Make.inc
Original file line number Diff line number Diff line change
Expand Up @@ -213,8 +213,12 @@ endif
ifeq ($(USE_SYSTEM_LIBUNWIND), 1)
LIBUNWIND=-lunwind-generic -lunwind
else
ifeq ($(OS),Darwin)
LIBUNWIND=$(BUILD)/lib/libosxunwind.a
else
LIBUNWIND=$(BUILD)/lib/libunwind-generic.a $(BUILD)/lib/libunwind.a
endif
endif

ifeq ($(USE_SYSTEM_LLVM), 1)
LLVM_CONFIG ?= llvm-config
Expand Down Expand Up @@ -353,7 +357,7 @@ INSTALL_NAME_CMD = install_name_tool -id $(INSTALL_NAME_ID_DIR)
INSTALL_NAME_CHANGE_CMD = install_name_tool -change
RPATH = -Wl,-rpath,'@executable_path/../$(JL_LIBDIR)' -Wl,-rpath,'@executable_path/../$(JL_PRIVATE_LIBDIR)'
SHLIB_EXT = dylib
OSLIBS += -ldl -Wl,-w -framework CoreFoundation -framework CoreServices
OSLIBS += -ldl -Wl,-w -framework CoreFoundation -framework CoreServices $(LIBUNWIND)
WHOLE_ARCHIVE = -Xlinker -all_load
NO_WHOLE_ARCHIVE =
JLDFLAGS =
Expand Down
5 changes: 3 additions & 2 deletions base/profile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,9 @@ end

# Number of backtrace "steps" that are triggered by taking the backtrace, e.g., inside profile_bt
# May be platform-specific?
@unix_only const btskip = 2
@windows_only const btskip = 0
#@unix_only const btskip = 2
#@windows_only const btskip = 0
const btskip = 0

## Print as a flat list
# Counts the number of times each line appears, at any nesting level
Expand Down
1 change: 1 addition & 0 deletions deps/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
/libgrisu.so
/lapack-*
/libunwind-*
/libosxunwind-*
/lighttpd-*
/llvm-*
/mpfr-*
Expand Down
37 changes: 36 additions & 1 deletion deps/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ ifeq ($(OS), Linux)
STAGE1_DEPS += unwind
else ifeq ($(OS), FreeBSD)
STAGE1_DEPS += unwind
else ifeq ($(OS), Darwin)
STAGE1_DEPS += osxunwind
endif
endif

Expand Down Expand Up @@ -139,7 +141,7 @@ install: $(addprefix install-, $(LIBS))
cleanall: $(addprefix clean-, $(LIBS))
distclean: $(addprefix distclean-, $(LIBS))
rm -rf $(BUILD)
getall: get-llvm get-readline get-uv get-pcre get-double-conversion get-openlibm get-random get-openblas get-lapack get-fftw get-suitesparse get-arpack get-unwind get-gmp get-mpfr get-zlib get-patchelf
getall: get-llvm get-readline get-uv get-pcre get-double-conversion get-openlibm get-random get-openblas get-lapack get-fftw get-suitesparse get-arpack get-unwind get-osxunwind get-gmp get-mpfr get-zlib get-patchelf

## PATHS ##
DIRS = $(addprefix $(BUILD)/,lib include bin share etc)
Expand Down Expand Up @@ -1279,6 +1281,39 @@ compile-unwind: $(LIBUNWIND_TARGET_SOURCE)
check-unwind: libunwind-$(UNWIND_VER)/checked
install-unwind: $(LIBUNWIND_TARGET_OBJ)

## OS X Unwind ##

OSXUNWIND_FLAGS = ARCH="$(ARCH)" CC="$(CC)" FC="$(FC)" AR="$(AR)" OS="$(OS)" USECLANG=$(USECLANG) USEGCC=$(USEGCC) CFLAGS="-ggdb3 -O0" CXXFLAGS="-ggdb3 -O0" SFLAGS="-ggdb3"

OSXUNWIND_OBJ_TARGET = $(BUILD)/lib/libosxunwind.$(SHLIB_EXT)
OSXUNWIND_OBJ_SOURCE = libosxunwind-$(OSXUNWIND_VER)/libosxunwind.$(SHLIB_EXT)

libosxunwind-$(OSXUNWIND_VER).tar.gz:
$(JLDOWNLOAD) $@ https://github.com/JuliaLang/libosxunwind/archive/v$(OSXUNWIND_VER).tar.gz

libosxunwind-$(OSXUNWIND_VER)/Makefile: libosxunwind-$(OSXUNWIND_VER).tar.gz
tar xfz $<
touch -c $@

$(OSXUNWIND_OBJ_SOURCE): libosxunwind-$(OSXUNWIND_VER)/Makefile
$(MAKE) -C libosxunwind-$(OSXUNWIND_VER) $(OSXUNWIND_FLAGS)
touch -c $@
$(OSXUNWIND_OBJ_TARGET): $(OSXUNWIND_OBJ_SOURCE) | $(BUILD)/lib
cp libosxunwind-$(OSXUNWIND_VER)/libosxunwind.a $(BUILD)/lib/libosxunwind.a
cp $< $@
cp -R libosxunwind-$(OSXUNWIND_VER)/include/* $(BUILD)/include
$(INSTALL_NAME_CMD)libosxunwind.$(SHLIB_EXT) $@

clean-osxunwind:
-$(MAKE) -C libosxunwind-$(OSXUNWIND_VER) distclean $(OSXUNWIND_FLAGS)
-rm $(OPENLIBM_OBJ_TARGET)
distclean-osxunwind: clean-osxunwind

get-osxunwind: libosxunwind-$(OSXUNWIND_VER)/Makefile
configure-osxunwind: get-osxunwind
compile-osxunwind: $(OSXUNWIND_OBJ_SOURCE)
check-osxunwind: compile-osxunwind
install-osxunwind: $(OSXUNWIND_OBJ_TARGET)

## GMP ##

Expand Down
1 change: 1 addition & 0 deletions deps/Versions.make
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ ARPACK_VER = 3.1.3
FFTW_VER = 3.3.3
SUITESPARSE_VER = 4.2.1
UNWIND_VER = 1.1
OSXUNWIND_VER = 0.0.1-rc3
GMP_VER=5.1.1
MPFR_VER=3.1.2
ZLIB_VER = 1.2.8
Expand Down
12 changes: 10 additions & 2 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,9 @@ void darwin_stack_overflow_handler(unw_context_t *uc)
#define HANDLE_MACH_ERROR(msg, retval) \
if (retval!=KERN_SUCCESS) { mach_error(msg ":", (retval)); jl_exit(1); }

extern kern_return_t profiler_segv_handler(mach_port_t,mach_port_t,mach_port_t,exception_type_t,exception_data_t,mach_msg_type_number_t);
extern volatile mach_port_t mach_profiler_thread;

//exc_server uses dlsym to find symbol
DLLEXPORT kern_return_t catch_exception_raise
(mach_port_t exception_port,
Expand All @@ -562,7 +565,12 @@ DLLEXPORT kern_return_t catch_exception_raise
kern_return_t ret;
//memset(&state,0,sizeof(x86_thread_state64_t));
//memset(&exc_state,0,sizeof(x86_exception_state64_t));
if (thread == mach_profiler_thread)
{
return profiler_segv_handler(exception_port,thread,task,exception,code,code_count);
}
ret = thread_get_state(thread,x86_EXCEPTION_STATE64,(thread_state_t)&exc_state,&exc_count);
HANDLE_MACH_ERROR("thread_get_state(1)",ret);
uint64_t fault_addr = exc_state.__faultvaddr;
if (
#ifdef COPY_STACKS
Expand All @@ -576,7 +584,7 @@ DLLEXPORT kern_return_t catch_exception_raise
)
{
ret = thread_get_state(thread,x86_THREAD_STATE64,(thread_state_t)&state,&count);
HANDLE_MACH_ERROR("thread_get_state",ret);
HANDLE_MACH_ERROR("thread_get_state(2)",ret);
old_state = state;
// memset(&state,0,sizeof(x86_thread_state64_t));
// Setup libunwind information
Expand All @@ -603,7 +611,7 @@ DLLEXPORT kern_return_t catch_exception_raise
return KERN_SUCCESS;
}
else {
return -309;
return KERN_INVALID_ARGUMENT;
}
}

Expand Down
4 changes: 4 additions & 0 deletions src/julia.h
Original file line number Diff line number Diff line change
Expand Up @@ -1234,6 +1234,10 @@ extern ptrint_t bt_data[MAX_BT_SIZE+1];
extern size_t bt_size;
DLLEXPORT size_t rec_backtrace(ptrint_t *data, size_t maxsize);
DLLEXPORT size_t rec_backtrace_ctx(ptrint_t *data, size_t maxsize, bt_context_t ctx);
#ifdef _OS_DARWIN_
size_t rec_backtrace_ctx_dwarf(ptrint_t *data, size_t maxsize, bt_context_t ctx);
#endif


//IO objects
extern DLLEXPORT uv_stream_t *jl_uv_stdin;
Expand Down
196 changes: 194 additions & 2 deletions src/profile.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <stdlib.h>
#include <stddef.h>
#include <stdio.h>
#include "julia.h"

static volatile ptrint_t* bt_data_prof = NULL;
Expand Down Expand Up @@ -81,9 +82,200 @@ DLLEXPORT void profile_stop_timer(void) {
}
#else
#include <signal.h>
#if defined (__APPLE__) || defined(__FreeBSD___)
#if defined (__APPLE__)
//
// BSD/OSX
// OS X
//
#include <mach/mach_traps.h>
#include <mach/task.h>
#include <mach/mig_errors.h>
#include <mach/clock.h>
#include <mach/clock_types.h>
#include <mach/clock_reply.h>
#include <assert.h>

#define HANDLE_MACH_ERROR(msg, retval) \
if (retval!=KERN_SUCCESS) { mach_error(msg ":", (retval)); jl_exit(1); }

static pthread_t profiler_thread;
static mach_port_t main_thread;
clock_serv_t clk;
static int profile_started = 0;
static mach_port_t profile_port = 0;
volatile static int running = 0;
volatile static int forceDwarf = -2;
volatile mach_port_t mach_profiler_thread = 0;
mach_timespec_t timerprof;
static unw_context_t profiler_uc;

kern_return_t profiler_segv_handler
(mach_port_t exception_port,
mach_port_t thread,
mach_port_t task,
exception_type_t exception,
exception_data_t code,
mach_msg_type_number_t code_count)
{
assert(thread == mach_profiler_thread);
x86_thread_state64_t state;

// Not currently unwinding. Raise regular segfault
if (forceDwarf == -2)
return KERN_INVALID_ARGUMENT;

if (forceDwarf == 0)
forceDwarf = 1;
else
forceDwarf = -1;

unsigned int count = MACHINE_THREAD_STATE_COUNT;

thread_get_state(thread,x86_THREAD_STATE64,(thread_state_t)&state,&count);

// don't change cs fs gs rflags
uint64_t cs = state.__cs;
uint64_t fs = state.__fs;
uint64_t gs = state.__gs;
uint64_t rflags = state.__rflags;

memcpy(&state,&profiler_uc,sizeof(x86_thread_state64_t));

state.__cs = cs;
state.__fs = fs;
state.__gs = gs;
state.__rflags = rflags;

kern_return_t ret = thread_set_state(thread,x86_THREAD_STATE64,(thread_state_t)&state,count);
HANDLE_MACH_ERROR("thread_set_state",ret);

return KERN_SUCCESS;
}

void * mach_profile_listener(void *arg)
{
(void) arg;
int max_size = 512;
mach_profiler_thread = mach_thread_self();
mig_reply_error_t *bufRequest = (mig_reply_error_t *) malloc(max_size);
while (1) {
kern_return_t ret = mach_msg(&bufRequest->Head, MACH_RCV_MSG,
0, max_size, profile_port,
MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL);
HANDLE_MACH_ERROR("mach_msg",ret)
if (bt_size_cur < bt_size_max) {
kern_return_t ret;
// Suspend the thread so we may safely sample it
ret = thread_suspend(main_thread);
HANDLE_MACH_ERROR("thread_suspend",ret);

// Do the actual sampling
unsigned int count = MACHINE_THREAD_STATE_COUNT;
x86_thread_state64_t state;

// Get the state of the suspended thread
ret = thread_get_state(main_thread,x86_THREAD_STATE64,(thread_state_t)&state,&count);
HANDLE_MACH_ERROR("thread_get_state",ret);

// Initialize the unwind context with the suspend thread's state
unw_context_t uc;
memset(&uc,0,sizeof(unw_context_t));
memcpy(&uc,&state,sizeof(x86_thread_state64_t));

/*
* Unfortunately compact unwind info is incorrectly generated for quite a number of
* libraries by quite a large number of compilers. We can fall back to DWARF unwind info
* in some cases, but in quite a number of cases (especially libraries not compiled in debug
* mode, only the compact unwind info may be available). Even more unfortunately, there is no
* way to detect such bogus compact unwind info (other than noticing the resulting segfault).
* What we do here is ugly, but necessary until the compact unwind info situation improves.
* We try to use the compact unwind info and if that results in a segfault, we retry with DWARF info.
* Note that in a small number of cases this may result in bogus stack traces, but at least the topmost
* entry will always be correct, and the number of cases in which this is an issue is rather small.
* Other than that, this implementation is not incorrect as the other thread is paused while we are profiling
* and during stack unwinding we only ever read memory, but never write it.
*/

forceDwarf = 0;
unw_getcontext(&profiler_uc);

if (forceDwarf == 0) {
// Save the backtrace
bt_size_cur += rec_backtrace_ctx((ptrint_t*)bt_data_prof+bt_size_cur, bt_size_max-bt_size_cur-1, &uc);
} else if(forceDwarf == 1) {
Copy link
Sponsor Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i don't think you want else here

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do. It jumps back to unw_getcontext on segv.

Copy link
Sponsor Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh, i see you are using unw_getcontext/unw_setcontext profiler_uc as a setjmp/longjmp jmp_buf. so this is correct. perhaps a comment here may be useful?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought that was covered by the if that results in a segfault, we retry with DWARF info.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but I guess I can add a comment explicitly mentioning it being a jump buffer.

bt_size_cur += rec_backtrace_ctx_dwarf((ptrint_t*)bt_data_prof+bt_size_cur, bt_size_max-bt_size_cur-1, &uc);
} else if (forceDwarf == -1) {
JL_PRINTF(JL_STDERR, "Warning: Profiler attempt to access an invalid memory location\n");
}

forceDwarf = -2;

// Mark the end of this block with 0
bt_data_prof[bt_size_cur] = 0;
bt_size_cur++;

// We're done! Resume the thread.
ret = thread_resume(main_thread);
HANDLE_MACH_ERROR("thread_resume",ret)

if (running) {
// Reset the alarm
ret = clock_alarm(clk, TIME_RELATIVE, timerprof, profile_port);
HANDLE_MACH_ERROR("clock_alarm",ret)
}
}
}
}

DLLEXPORT int profile_start_timer(void)
{
kern_return_t ret;
if (!profile_started)
{
mach_port_t self = mach_task_self();
main_thread = mach_thread_self();

ret = host_get_clock_service(mach_host_self(), SYSTEM_CLOCK, (clock_serv_t *)&clk);
HANDLE_MACH_ERROR("host_get_clock_service", ret);

ret = mach_port_allocate(self,MACH_PORT_RIGHT_RECEIVE,&profile_port);
HANDLE_MACH_ERROR("mach_port_allocate",ret);

// Alright, create a thread to serve as the listener for exceptions
pthread_attr_t attr;
if (pthread_attr_init(&attr) != 0)
{
JL_PRINTF(JL_STDERR, "pthread_attr_init failed");
jl_exit(1);
}
pthread_attr_setdetachstate(&attr,PTHREAD_CREATE_DETACHED);
if (pthread_create(&profiler_thread,&attr,mach_profile_listener,NULL) != 0)
{
JL_PRINTF(JL_STDERR, "pthread_create failed");
jl_exit(1);
}
pthread_attr_destroy(&attr);

profile_started = 1;
}

timerprof.tv_sec = 0;
timerprof.tv_nsec = nsecprof;

running = 1;
ret = clock_alarm(clk, TIME_RELATIVE, timerprof, profile_port);
HANDLE_MACH_ERROR("clock_alarm",ret)

return 0;
}

DLLEXPORT void profile_stop_timer(void)
{
running = 0;
}

#elif defined(__FreeBSD___)
//
// BSD
//
#include <sys/time.h>
struct itimerval timerprof;
Expand Down
Loading