diff --git a/Make.inc b/Make.inc index 39bade4f4dcd2..9d5ea3aefd951 100644 --- a/Make.inc +++ b/Make.inc @@ -213,8 +213,12 @@ endif ifeq ($(USE_SYSTEM_LIBUNWIND), 1) LIBUNWIND=-lunwind-generic -lunwind else +ifeq ($(OS),Darwin) +LIBUNWIND=$(BUILD)/lib/libosxunwind.a +else LIBUNWIND=$(BUILD)/lib/libunwind-generic.a $(BUILD)/lib/libunwind.a endif +endif ifeq ($(USE_SYSTEM_LLVM), 1) LLVM_CONFIG ?= llvm-config @@ -353,7 +357,7 @@ INSTALL_NAME_CMD = install_name_tool -id $(INSTALL_NAME_ID_DIR) INSTALL_NAME_CHANGE_CMD = install_name_tool -change RPATH = -Wl,-rpath,'@executable_path/../$(JL_LIBDIR)' -Wl,-rpath,'@executable_path/../$(JL_PRIVATE_LIBDIR)' SHLIB_EXT = dylib -OSLIBS += -ldl -Wl,-w -framework CoreFoundation -framework CoreServices +OSLIBS += -ldl -Wl,-w -framework CoreFoundation -framework CoreServices $(LIBUNWIND) WHOLE_ARCHIVE = -Xlinker -all_load NO_WHOLE_ARCHIVE = JLDFLAGS = diff --git a/base/profile.jl b/base/profile.jl index b97d7f70250f5..947cba596fb3e 100644 --- a/base/profile.jl +++ b/base/profile.jl @@ -111,8 +111,9 @@ end # Number of backtrace "steps" that are triggered by taking the backtrace, e.g., inside profile_bt # May be platform-specific? -@unix_only const btskip = 2 -@windows_only const btskip = 0 +#@unix_only const btskip = 2 +#@windows_only const btskip = 0 +const btskip = 0 ## Print as a flat list # Counts the number of times each line appears, at any nesting level diff --git a/deps/.gitignore b/deps/.gitignore index 5777032df532c..59b4f07235b1e 100644 --- a/deps/.gitignore +++ b/deps/.gitignore @@ -14,6 +14,7 @@ /libgrisu.so /lapack-* /libunwind-* +/libosxunwind-* /lighttpd-* /llvm-* /mpfr-* diff --git a/deps/Makefile b/deps/Makefile index e84a9af5d5555..efec7f206328f 100644 --- a/deps/Makefile +++ b/deps/Makefile @@ -35,6 +35,8 @@ ifeq ($(OS), Linux) STAGE1_DEPS += unwind else ifeq ($(OS), FreeBSD) STAGE1_DEPS += unwind +else ifeq ($(OS), Darwin) +STAGE1_DEPS += osxunwind endif endif @@ -139,7 +141,7 @@ install: $(addprefix install-, $(LIBS)) cleanall: $(addprefix clean-, $(LIBS)) distclean: $(addprefix distclean-, $(LIBS)) rm -rf $(BUILD) -getall: get-llvm get-readline get-uv get-pcre get-double-conversion get-openlibm get-random get-openblas get-lapack get-fftw get-suitesparse get-arpack get-unwind get-gmp get-mpfr get-zlib get-patchelf +getall: get-llvm get-readline get-uv get-pcre get-double-conversion get-openlibm get-random get-openblas get-lapack get-fftw get-suitesparse get-arpack get-unwind get-osxunwind get-gmp get-mpfr get-zlib get-patchelf ## PATHS ## DIRS = $(addprefix $(BUILD)/,lib include bin share etc) @@ -1279,6 +1281,39 @@ compile-unwind: $(LIBUNWIND_TARGET_SOURCE) check-unwind: libunwind-$(UNWIND_VER)/checked install-unwind: $(LIBUNWIND_TARGET_OBJ) +## OS X Unwind ## + +OSXUNWIND_FLAGS = ARCH="$(ARCH)" CC="$(CC)" FC="$(FC)" AR="$(AR)" OS="$(OS)" USECLANG=$(USECLANG) USEGCC=$(USEGCC) CFLAGS="-ggdb3 -O0" CXXFLAGS="-ggdb3 -O0" SFLAGS="-ggdb3" + +OSXUNWIND_OBJ_TARGET = $(BUILD)/lib/libosxunwind.$(SHLIB_EXT) +OSXUNWIND_OBJ_SOURCE = libosxunwind-$(OSXUNWIND_VER)/libosxunwind.$(SHLIB_EXT) + +libosxunwind-$(OSXUNWIND_VER).tar.gz: + $(JLDOWNLOAD) $@ https://github.com/JuliaLang/libosxunwind/archive/v$(OSXUNWIND_VER).tar.gz + +libosxunwind-$(OSXUNWIND_VER)/Makefile: libosxunwind-$(OSXUNWIND_VER).tar.gz + tar xfz $< + touch -c $@ + +$(OSXUNWIND_OBJ_SOURCE): libosxunwind-$(OSXUNWIND_VER)/Makefile + $(MAKE) -C libosxunwind-$(OSXUNWIND_VER) $(OSXUNWIND_FLAGS) + touch -c $@ +$(OSXUNWIND_OBJ_TARGET): $(OSXUNWIND_OBJ_SOURCE) | $(BUILD)/lib + cp libosxunwind-$(OSXUNWIND_VER)/libosxunwind.a $(BUILD)/lib/libosxunwind.a + cp $< $@ + cp -R libosxunwind-$(OSXUNWIND_VER)/include/* $(BUILD)/include + $(INSTALL_NAME_CMD)libosxunwind.$(SHLIB_EXT) $@ + +clean-osxunwind: + -$(MAKE) -C libosxunwind-$(OSXUNWIND_VER) distclean $(OSXUNWIND_FLAGS) + -rm $(OPENLIBM_OBJ_TARGET) +distclean-osxunwind: clean-osxunwind + +get-osxunwind: libosxunwind-$(OSXUNWIND_VER)/Makefile +configure-osxunwind: get-osxunwind +compile-osxunwind: $(OSXUNWIND_OBJ_SOURCE) +check-osxunwind: compile-osxunwind +install-osxunwind: $(OSXUNWIND_OBJ_TARGET) ## GMP ## diff --git a/deps/Versions.make b/deps/Versions.make index c0a80653baa38..b3ff216ec936f 100644 --- a/deps/Versions.make +++ b/deps/Versions.make @@ -14,6 +14,7 @@ ARPACK_VER = 3.1.3 FFTW_VER = 3.3.3 SUITESPARSE_VER = 4.2.1 UNWIND_VER = 1.1 +OSXUNWIND_VER = 0.0.1-rc3 GMP_VER=5.1.1 MPFR_VER=3.1.2 ZLIB_VER = 1.2.8 diff --git a/src/init.c b/src/init.c index 94c289b2c9345..15e5b2dde33c9 100644 --- a/src/init.c +++ b/src/init.c @@ -546,6 +546,9 @@ void darwin_stack_overflow_handler(unw_context_t *uc) #define HANDLE_MACH_ERROR(msg, retval) \ if (retval!=KERN_SUCCESS) { mach_error(msg ":", (retval)); jl_exit(1); } +extern kern_return_t profiler_segv_handler(mach_port_t,mach_port_t,mach_port_t,exception_type_t,exception_data_t,mach_msg_type_number_t); +extern volatile mach_port_t mach_profiler_thread; + //exc_server uses dlsym to find symbol DLLEXPORT kern_return_t catch_exception_raise (mach_port_t exception_port, @@ -562,7 +565,12 @@ DLLEXPORT kern_return_t catch_exception_raise kern_return_t ret; //memset(&state,0,sizeof(x86_thread_state64_t)); //memset(&exc_state,0,sizeof(x86_exception_state64_t)); + if (thread == mach_profiler_thread) + { + return profiler_segv_handler(exception_port,thread,task,exception,code,code_count); + } ret = thread_get_state(thread,x86_EXCEPTION_STATE64,(thread_state_t)&exc_state,&exc_count); + HANDLE_MACH_ERROR("thread_get_state(1)",ret); uint64_t fault_addr = exc_state.__faultvaddr; if ( #ifdef COPY_STACKS @@ -576,7 +584,7 @@ DLLEXPORT kern_return_t catch_exception_raise ) { ret = thread_get_state(thread,x86_THREAD_STATE64,(thread_state_t)&state,&count); - HANDLE_MACH_ERROR("thread_get_state",ret); + HANDLE_MACH_ERROR("thread_get_state(2)",ret); old_state = state; // memset(&state,0,sizeof(x86_thread_state64_t)); // Setup libunwind information @@ -603,7 +611,7 @@ DLLEXPORT kern_return_t catch_exception_raise return KERN_SUCCESS; } else { - return -309; + return KERN_INVALID_ARGUMENT; } } diff --git a/src/julia.h b/src/julia.h index 39cc2eeaf9fe9..0f03b44800e54 100644 --- a/src/julia.h +++ b/src/julia.h @@ -1234,6 +1234,10 @@ extern ptrint_t bt_data[MAX_BT_SIZE+1]; extern size_t bt_size; DLLEXPORT size_t rec_backtrace(ptrint_t *data, size_t maxsize); DLLEXPORT size_t rec_backtrace_ctx(ptrint_t *data, size_t maxsize, bt_context_t ctx); +#ifdef _OS_DARWIN_ +size_t rec_backtrace_ctx_dwarf(ptrint_t *data, size_t maxsize, bt_context_t ctx); +#endif + //IO objects extern DLLEXPORT uv_stream_t *jl_uv_stdin; diff --git a/src/profile.c b/src/profile.c index b9bea2ed4a9d6..a79b27b6f68ed 100644 --- a/src/profile.c +++ b/src/profile.c @@ -1,5 +1,6 @@ #include #include +#include #include "julia.h" static volatile ptrint_t* bt_data_prof = NULL; @@ -81,9 +82,200 @@ DLLEXPORT void profile_stop_timer(void) { } #else #include -#if defined (__APPLE__) || defined(__FreeBSD___) +#if defined (__APPLE__) // -// BSD/OSX +// OS X +// +#include +#include +#include +#include +#include +#include +#include + +#define HANDLE_MACH_ERROR(msg, retval) \ + if (retval!=KERN_SUCCESS) { mach_error(msg ":", (retval)); jl_exit(1); } + +static pthread_t profiler_thread; +static mach_port_t main_thread; +clock_serv_t clk; +static int profile_started = 0; +static mach_port_t profile_port = 0; +volatile static int running = 0; +volatile static int forceDwarf = -2; +volatile mach_port_t mach_profiler_thread = 0; +mach_timespec_t timerprof; +static unw_context_t profiler_uc; + +kern_return_t profiler_segv_handler + (mach_port_t exception_port, + mach_port_t thread, + mach_port_t task, + exception_type_t exception, + exception_data_t code, + mach_msg_type_number_t code_count) +{ + assert(thread == mach_profiler_thread); + x86_thread_state64_t state; + + // Not currently unwinding. Raise regular segfault + if (forceDwarf == -2) + return KERN_INVALID_ARGUMENT; + + if (forceDwarf == 0) + forceDwarf = 1; + else + forceDwarf = -1; + + unsigned int count = MACHINE_THREAD_STATE_COUNT; + + thread_get_state(thread,x86_THREAD_STATE64,(thread_state_t)&state,&count); + + // don't change cs fs gs rflags + uint64_t cs = state.__cs; + uint64_t fs = state.__fs; + uint64_t gs = state.__gs; + uint64_t rflags = state.__rflags; + + memcpy(&state,&profiler_uc,sizeof(x86_thread_state64_t)); + + state.__cs = cs; + state.__fs = fs; + state.__gs = gs; + state.__rflags = rflags; + + kern_return_t ret = thread_set_state(thread,x86_THREAD_STATE64,(thread_state_t)&state,count); + HANDLE_MACH_ERROR("thread_set_state",ret); + + return KERN_SUCCESS; +} + +void * mach_profile_listener(void *arg) +{ + (void) arg; + int max_size = 512; + mach_profiler_thread = mach_thread_self(); + mig_reply_error_t *bufRequest = (mig_reply_error_t *) malloc(max_size); + while (1) { + kern_return_t ret = mach_msg(&bufRequest->Head, MACH_RCV_MSG, + 0, max_size, profile_port, + MACH_MSG_TIMEOUT_NONE, MACH_PORT_NULL); + HANDLE_MACH_ERROR("mach_msg",ret) + if (bt_size_cur < bt_size_max) { + kern_return_t ret; + // Suspend the thread so we may safely sample it + ret = thread_suspend(main_thread); + HANDLE_MACH_ERROR("thread_suspend",ret); + + // Do the actual sampling + unsigned int count = MACHINE_THREAD_STATE_COUNT; + x86_thread_state64_t state; + + // Get the state of the suspended thread + ret = thread_get_state(main_thread,x86_THREAD_STATE64,(thread_state_t)&state,&count); + HANDLE_MACH_ERROR("thread_get_state",ret); + + // Initialize the unwind context with the suspend thread's state + unw_context_t uc; + memset(&uc,0,sizeof(unw_context_t)); + memcpy(&uc,&state,sizeof(x86_thread_state64_t)); + + /* + * Unfortunately compact unwind info is incorrectly generated for quite a number of + * libraries by quite a large number of compilers. We can fall back to DWARF unwind info + * in some cases, but in quite a number of cases (especially libraries not compiled in debug + * mode, only the compact unwind info may be available). Even more unfortunately, there is no + * way to detect such bogus compact unwind info (other than noticing the resulting segfault). + * What we do here is ugly, but necessary until the compact unwind info situation improves. + * We try to use the compact unwind info and if that results in a segfault, we retry with DWARF info. + * Note that in a small number of cases this may result in bogus stack traces, but at least the topmost + * entry will always be correct, and the number of cases in which this is an issue is rather small. + * Other than that, this implementation is not incorrect as the other thread is paused while we are profiling + * and during stack unwinding we only ever read memory, but never write it. + */ + + forceDwarf = 0; + unw_getcontext(&profiler_uc); + + if (forceDwarf == 0) { + // Save the backtrace + bt_size_cur += rec_backtrace_ctx((ptrint_t*)bt_data_prof+bt_size_cur, bt_size_max-bt_size_cur-1, &uc); + } else if(forceDwarf == 1) { + bt_size_cur += rec_backtrace_ctx_dwarf((ptrint_t*)bt_data_prof+bt_size_cur, bt_size_max-bt_size_cur-1, &uc); + } else if (forceDwarf == -1) { + JL_PRINTF(JL_STDERR, "Warning: Profiler attempt to access an invalid memory location\n"); + } + + forceDwarf = -2; + + // Mark the end of this block with 0 + bt_data_prof[bt_size_cur] = 0; + bt_size_cur++; + + // We're done! Resume the thread. + ret = thread_resume(main_thread); + HANDLE_MACH_ERROR("thread_resume",ret) + + if (running) { + // Reset the alarm + ret = clock_alarm(clk, TIME_RELATIVE, timerprof, profile_port); + HANDLE_MACH_ERROR("clock_alarm",ret) + } + } + } +} + +DLLEXPORT int profile_start_timer(void) +{ + kern_return_t ret; + if (!profile_started) + { + mach_port_t self = mach_task_self(); + main_thread = mach_thread_self(); + + ret = host_get_clock_service(mach_host_self(), SYSTEM_CLOCK, (clock_serv_t *)&clk); + HANDLE_MACH_ERROR("host_get_clock_service", ret); + + ret = mach_port_allocate(self,MACH_PORT_RIGHT_RECEIVE,&profile_port); + HANDLE_MACH_ERROR("mach_port_allocate",ret); + + // Alright, create a thread to serve as the listener for exceptions + pthread_attr_t attr; + if (pthread_attr_init(&attr) != 0) + { + JL_PRINTF(JL_STDERR, "pthread_attr_init failed"); + jl_exit(1); + } + pthread_attr_setdetachstate(&attr,PTHREAD_CREATE_DETACHED); + if (pthread_create(&profiler_thread,&attr,mach_profile_listener,NULL) != 0) + { + JL_PRINTF(JL_STDERR, "pthread_create failed"); + jl_exit(1); + } + pthread_attr_destroy(&attr); + + profile_started = 1; + } + + timerprof.tv_sec = 0; + timerprof.tv_nsec = nsecprof; + + running = 1; + ret = clock_alarm(clk, TIME_RELATIVE, timerprof, profile_port); + HANDLE_MACH_ERROR("clock_alarm",ret) + + return 0; +} + +DLLEXPORT void profile_stop_timer(void) +{ + running = 0; +} + +#elif defined(__FreeBSD___) +// +// BSD // #include struct itimerval timerprof; diff --git a/src/task.c b/src/task.c index f243946cb3e09..aca384e57e3cb 100644 --- a/src/task.c +++ b/src/task.c @@ -558,14 +558,35 @@ DLLEXPORT size_t rec_backtrace_ctx(ptrint_t *data, size_t maxsize, unw_context_t size_t n=0; unw_init_local(&cursor, uc); - while (unw_step(&cursor) > 0 && n < maxsize) { + do { + if (n >= maxsize) + break; if (unw_get_reg(&cursor, UNW_REG_IP, &ip) < 0) { break; } data[n++] = ip; - } + } while (unw_step(&cursor) > 0); return n; } +#ifdef _OS_DARWIN_ +size_t rec_backtrace_ctx_dwarf(ptrint_t *data, size_t maxsize, unw_context_t *uc) +{ + unw_cursor_t cursor; + unw_word_t ip; + size_t n=0; + + unw_init_local_dwarf(&cursor, uc); + do { + if (n >= maxsize) + break; + if (unw_get_reg(&cursor, UNW_REG_IP, &ip) < 0) { + break; + } + data[n++] = ip; + } while (unw_step(&cursor) > 0); + return n; +} +#endif #endif static void record_backtrace(void)