Skip to content

Commit

Permalink
Merge pull request #4159 from JuliaLang/kf/profile
Browse files Browse the repository at this point in the history
Fix profiling on OS X
  • Loading branch information
Keno committed Aug 28, 2013
2 parents 7b427b5 + 9c56286 commit 924fb0f
Show file tree
Hide file tree
Showing 9 changed files with 277 additions and 10 deletions.
6 changes: 5 additions & 1 deletion Make.inc
Original file line number Diff line number Diff line change
Expand Up @@ -213,8 +213,12 @@ endif
ifeq ($(USE_SYSTEM_LIBUNWIND), 1)
LIBUNWIND=-lunwind-generic -lunwind
else
ifeq ($(OS),Darwin)
LIBUNWIND=$(BUILD)/lib/libosxunwind.a
else
LIBUNWIND=$(BUILD)/lib/libunwind-generic.a $(BUILD)/lib/libunwind.a
endif
endif

ifeq ($(USE_SYSTEM_LLVM), 1)
LLVM_CONFIG ?= llvm-config
Expand Down Expand Up @@ -353,7 +357,7 @@ INSTALL_NAME_CMD = install_name_tool -id $(INSTALL_NAME_ID_DIR)
INSTALL_NAME_CHANGE_CMD = install_name_tool -change
RPATH = -Wl,-rpath,'@executable_path/../$(JL_LIBDIR)' -Wl,-rpath,'@executable_path/../$(JL_PRIVATE_LIBDIR)'
SHLIB_EXT = dylib
OSLIBS += -ldl -Wl,-w -framework CoreFoundation -framework CoreServices
OSLIBS += -ldl -Wl,-w -framework CoreFoundation -framework CoreServices $(LIBUNWIND)
WHOLE_ARCHIVE = -Xlinker -all_load
NO_WHOLE_ARCHIVE =
JLDFLAGS =
Expand Down
5 changes: 3 additions & 2 deletions base/profile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,9 @@ end

# Number of backtrace "steps" that are triggered by taking the backtrace, e.g., inside profile_bt
# May be platform-specific?
@unix_only const btskip = 2
@windows_only const btskip = 0
#@unix_only const btskip = 2
#@windows_only const btskip = 0
const btskip = 0

## Print as a flat list
# Counts the number of times each line appears, at any nesting level
Expand Down
1 change: 1 addition & 0 deletions deps/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
/libgrisu.so
/lapack-*
/libunwind-*
/libosxunwind-*
/lighttpd-*
/llvm-*
/mpfr-*
Expand Down
37 changes: 36 additions & 1 deletion deps/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ ifeq ($(OS), Linux)
STAGE1_DEPS += unwind
else ifeq ($(OS), FreeBSD)
STAGE1_DEPS += unwind
else ifeq ($(OS), Darwin)
STAGE1_DEPS += osxunwind
endif
endif

Expand Down Expand Up @@ -139,7 +141,7 @@ install: $(addprefix install-, $(LIBS))
cleanall: $(addprefix clean-, $(LIBS))
distclean: $(addprefix distclean-, $(LIBS))
rm -rf $(BUILD)
getall: get-llvm get-readline get-uv get-pcre get-double-conversion get-openlibm get-random get-openblas get-lapack get-fftw get-suitesparse get-arpack get-unwind get-gmp get-mpfr get-zlib get-patchelf
getall: get-llvm get-readline get-uv get-pcre get-double-conversion get-openlibm get-random get-openblas get-lapack get-fftw get-suitesparse get-arpack get-unwind get-osxunwind get-gmp get-mpfr get-zlib get-patchelf

## PATHS ##
DIRS = $(addprefix $(BUILD)/,lib include bin share etc)
Expand Down Expand Up @@ -1279,6 +1281,39 @@ compile-unwind: $(LIBUNWIND_TARGET_SOURCE)
check-unwind: libunwind-$(UNWIND_VER)/checked
install-unwind: $(LIBUNWIND_TARGET_OBJ)

## OS X Unwind ##

OSXUNWIND_FLAGS = ARCH="$(ARCH)" CC="$(CC)" FC="$(FC)" AR="$(AR)" OS="$(OS)" USECLANG=$(USECLANG) USEGCC=$(USEGCC) CFLAGS="-ggdb3 -O0" CXXFLAGS="-ggdb3 -O0" SFLAGS="-ggdb3"

OSXUNWIND_OBJ_TARGET = $(BUILD)/lib/libosxunwind.$(SHLIB_EXT)
OSXUNWIND_OBJ_SOURCE = libosxunwind-$(OSXUNWIND_VER)/libosxunwind.$(SHLIB_EXT)

libosxunwind-$(OSXUNWIND_VER).tar.gz:
$(JLDOWNLOAD) $@ https://github.com/JuliaLang/libosxunwind/archive/v$(OSXUNWIND_VER).tar.gz

libosxunwind-$(OSXUNWIND_VER)/Makefile: libosxunwind-$(OSXUNWIND_VER).tar.gz
tar xfz $<
touch -c $@

$(OSXUNWIND_OBJ_SOURCE): libosxunwind-$(OSXUNWIND_VER)/Makefile
$(MAKE) -C libosxunwind-$(OSXUNWIND_VER) $(OSXUNWIND_FLAGS)
touch -c $@
$(OSXUNWIND_OBJ_TARGET): $(OSXUNWIND_OBJ_SOURCE) | $(BUILD)/lib
cp libosxunwind-$(OSXUNWIND_VER)/libosxunwind.a $(BUILD)/lib/libosxunwind.a
cp $< $@
cp -R libosxunwind-$(OSXUNWIND_VER)/include/* $(BUILD)/include
$(INSTALL_NAME_CMD)libosxunwind.$(SHLIB_EXT) $@

clean-osxunwind:
-$(MAKE) -C libosxunwind-$(OSXUNWIND_VER) distclean $(OSXUNWIND_FLAGS)
-rm $(OPENLIBM_OBJ_TARGET)
distclean-osxunwind: clean-osxunwind

get-osxunwind: libosxunwind-$(OSXUNWIND_VER)/Makefile
configure-osxunwind: get-osxunwind
compile-osxunwind: $(OSXUNWIND_OBJ_SOURCE)
check-osxunwind: compile-osxunwind
install-osxunwind: $(OSXUNWIND_OBJ_TARGET)

## GMP ##

Expand Down
1 change: 1 addition & 0 deletions deps/Versions.make
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ ARPACK_VER = 3.1.3
FFTW_VER = 3.3.3
SUITESPARSE_VER = 4.2.1
UNWIND_VER = 1.1
OSXUNWIND_VER = 0.0.1-rc3
GMP_VER=5.1.1
MPFR_VER=3.1.2
ZLIB_VER = 1.2.8
Expand Down
12 changes: 10 additions & 2 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,9 @@ void darwin_stack_overflow_handler(unw_context_t *uc)
#define HANDLE_MACH_ERROR(msg, retval) \
if (retval!=KERN_SUCCESS) { mach_error(msg ":", (retval)); jl_exit(1); }

extern kern_return_t profiler_segv_handler(mach_port_t,mach_port_t,mach_port_t,exception_type_t,exception_data_t,mach_msg_type_number_t);
extern volatile mach_port_t mach_profiler_thread;

//exc_server uses dlsym to find symbol
DLLEXPORT kern_return_t catch_exception_raise
(mach_port_t exception_port,
Expand All @@ -562,7 +565,12 @@ DLLEXPORT kern_return_t catch_exception_raise
kern_return_t ret;
//memset(&state,0,sizeof(x86_thread_state64_t));
//memset(&exc_state,0,sizeof(x86_exception_state64_t));
if (thread == mach_profiler_thread)
{
return profiler_segv_handler(exception_port,thread,task,exception,code,code_count);
}
ret = thread_get_state(thread,x86_EXCEPTION_STATE64,(thread_state_t)&exc_state,&exc_count);
HANDLE_MACH_ERROR("thread_get_state(1)",ret);
uint64_t fault_addr = exc_state.__faultvaddr;
if (
#ifdef COPY_STACKS
Expand All @@ -576,7 +584,7 @@ DLLEXPORT kern_return_t catch_exception_raise
)
{
ret = thread_get_state(thread,x86_THREAD_STATE64,(thread_state_t)&state,&count);
HANDLE_MACH_ERROR("thread_get_state",ret);
HANDLE_MACH_ERROR("thread_get_state(2)",ret);
old_state = state;
// memset(&state,0,sizeof(x86_thread_state64_t));
// Setup libunwind information
Expand All @@ -603,7 +611,7 @@ DLLEXPORT kern_return_t catch_exception_raise
return KERN_SUCCESS;
}
else {
return -309;
return KERN_INVALID_ARGUMENT;
}
}

Expand Down
4 changes: 4 additions & 0 deletions src/julia.h
Original file line number Diff line number Diff line change
Expand Up @@ -1234,6 +1234,10 @@ extern ptrint_t bt_data[MAX_BT_SIZE+1];
extern size_t bt_size;
DLLEXPORT size_t rec_backtrace(ptrint_t *data, size_t maxsize);
DLLEXPORT size_t rec_backtrace_ctx(ptrint_t *data, size_t maxsize, bt_context_t ctx);
#ifdef _OS_DARWIN_
size_t rec_backtrace_ctx_dwarf(ptrint_t *data, size_t maxsize, bt_context_t ctx);
#endif


//IO objects
extern DLLEXPORT uv_stream_t *jl_uv_stdin;
Expand Down
196 changes: 194 additions & 2 deletions src/profile.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <stdlib.h>
#include <stddef.h>
#include <stdio.h>
#include "julia.h"

static volatile ptrint_t* bt_data_prof = NULL;
Expand Down Expand Up @@ -81,9 +82,200 @@ DLLEXPORT void profile_stop_timer(void) {
}
#else
#include <signal.h>
#if defined (__APPLE__) || defined(__FreeBSD___)
#if defined (__APPLE__)
//
// BSD/OSX
// OS X
//
#include <mach/mach_traps.h>
#include <mach/task.h>
#include <mach/mig_errors.h>
#include <mach/clock.h>
#include <mach/clock_types.h>
#include <mach/clock_reply.h>
#include <assert.h>

#define HANDLE_MACH_ERROR(msg, retval) \
if (retval!=KERN_SUCCESS) { mach_error(msg ":", (retval)); jl_exit(1); }

static pthread_t profiler_thread;
static mach_port_t main_thread;
clock_serv_t clk;
static int profile_started = 0;
static mach_port_t profile_port = 0;
volatile static int running = 0;
volatile static int forceDwarf = -2;
volatile mach_port_t mach_profiler_thread = 0;
mach_timespec_t timerprof;
static unw_context_t profiler_uc;

kern_return_t profiler_segv_handler
(mach_port_t exception_port,
mach_port_t thread,
mach_port_t task,
exception_type_t exception,
exception_data_t code,
mach_msg_type_number_t code_count)
{
assert(thread == mach_profiler_thread);
x86_thread_state64_t state;

// Not currently unwinding. Raise regular segfault
if (forceDwarf == -2)
return KERN_INVALID_ARGUMENT;

if (forceDwarf == 0)
forceDwarf = 1;
else
forceDwarf = -1;

unsigned int count = MACHINE_THREAD_STATE_COUNT;

thread_get_state(thread,x86_THREAD_STATE64,(thread_state_t)&state,&count);

// don't change cs fs gs rflags
uint64_t cs = state.__cs;
uint64_t fs = state.__fs;
uint64_t gs = state.__gs;
uint64_t rflags = state.__rflags;

memcpy(&state,&profiler_uc,sizeof(x86_thread_state64_t));

state.__cs = cs;
state.__fs = fs;
state.__gs = gs;
state.__rflags = rflags;

kern_return_t ret = thread_set_state(thread,x86_THREAD_STATE64,(thread_state_t)&state,count);
HANDLE_MACH_ERROR("thread_set_state",ret);

return KERN_SUCCESS;
}

void * mach_profile_listener(void *arg)
{
(void) arg;
int max_size = 512;
mach_profiler_thread = mach_thread_self();
mig_reply_error_t *bufRequest = (mig_reply_error_t *) malloc(max_size);
while (1) {
kern_return_t ret = mach_msg(&bufRequest->Head, MACH_RCV_MSG,
0, max_size, profile_port,
MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL);
HANDLE_MACH_ERROR("mach_msg",ret)
if (bt_size_cur < bt_size_max) {
kern_return_t ret;
// Suspend the thread so we may safely sample it
ret = thread_suspend(main_thread);
HANDLE_MACH_ERROR("thread_suspend",ret);

// Do the actual sampling
unsigned int count = MACHINE_THREAD_STATE_COUNT;
x86_thread_state64_t state;

// Get the state of the suspended thread
ret = thread_get_state(main_thread,x86_THREAD_STATE64,(thread_state_t)&state,&count);
HANDLE_MACH_ERROR("thread_get_state",ret);

// Initialize the unwind context with the suspend thread's state
unw_context_t uc;
memset(&uc,0,sizeof(unw_context_t));
memcpy(&uc,&state,sizeof(x86_thread_state64_t));

/*
* Unfortunately compact unwind info is incorrectly generated for quite a number of
* libraries by quite a large number of compilers. We can fall back to DWARF unwind info
* in some cases, but in quite a number of cases (especially libraries not compiled in debug
* mode, only the compact unwind info may be available). Even more unfortunately, there is no
* way to detect such bogus compact unwind info (other than noticing the resulting segfault).
* What we do here is ugly, but necessary until the compact unwind info situation improves.
* We try to use the compact unwind info and if that results in a segfault, we retry with DWARF info.
* Note that in a small number of cases this may result in bogus stack traces, but at least the topmost
* entry will always be correct, and the number of cases in which this is an issue is rather small.
* Other than that, this implementation is not incorrect as the other thread is paused while we are profiling
* and during stack unwinding we only ever read memory, but never write it.
*/

forceDwarf = 0;
unw_getcontext(&profiler_uc);

if (forceDwarf == 0) {
// Save the backtrace
bt_size_cur += rec_backtrace_ctx((ptrint_t*)bt_data_prof+bt_size_cur, bt_size_max-bt_size_cur-1, &uc);
} else if(forceDwarf == 1) {
bt_size_cur += rec_backtrace_ctx_dwarf((ptrint_t*)bt_data_prof+bt_size_cur, bt_size_max-bt_size_cur-1, &uc);
} else if (forceDwarf == -1) {
JL_PRINTF(JL_STDERR, "Warning: Profiler attempt to access an invalid memory location\n");
}

forceDwarf = -2;

// Mark the end of this block with 0
bt_data_prof[bt_size_cur] = 0;
bt_size_cur++;

// We're done! Resume the thread.
ret = thread_resume(main_thread);
HANDLE_MACH_ERROR("thread_resume",ret)

if (running) {
// Reset the alarm
ret = clock_alarm(clk, TIME_RELATIVE, timerprof, profile_port);
HANDLE_MACH_ERROR("clock_alarm",ret)
}
}
}
}

DLLEXPORT int profile_start_timer(void)
{
kern_return_t ret;
if (!profile_started)
{
mach_port_t self = mach_task_self();
main_thread = mach_thread_self();

ret = host_get_clock_service(mach_host_self(), SYSTEM_CLOCK, (clock_serv_t *)&clk);
HANDLE_MACH_ERROR("host_get_clock_service", ret);

ret = mach_port_allocate(self,MACH_PORT_RIGHT_RECEIVE,&profile_port);
HANDLE_MACH_ERROR("mach_port_allocate",ret);

// Alright, create a thread to serve as the listener for exceptions
pthread_attr_t attr;
if (pthread_attr_init(&attr) != 0)
{
JL_PRINTF(JL_STDERR, "pthread_attr_init failed");
jl_exit(1);
}
pthread_attr_setdetachstate(&attr,PTHREAD_CREATE_DETACHED);
if (pthread_create(&profiler_thread,&attr,mach_profile_listener,NULL) != 0)
{
JL_PRINTF(JL_STDERR, "pthread_create failed");
jl_exit(1);
}
pthread_attr_destroy(&attr);

profile_started = 1;
}

timerprof.tv_sec = 0;
timerprof.tv_nsec = nsecprof;

running = 1;
ret = clock_alarm(clk, TIME_RELATIVE, timerprof, profile_port);
HANDLE_MACH_ERROR("clock_alarm",ret)

return 0;
}

DLLEXPORT void profile_stop_timer(void)
{
running = 0;
}

#elif defined(__FreeBSD___)
//
// BSD
//
#include <sys/time.h>
struct itimerval timerprof;
Expand Down
Loading

0 comments on commit 924fb0f

Please sign in to comment.