From 8d0fcdb9f9325725ca9b7cc4c28f674ec772064f Mon Sep 17 00:00:00 2001 From: DokFaust Date: Tue, 29 May 2018 17:08:41 +0200 Subject: [PATCH] Added the capability of looking up JIT symbols for perf Moved Oprofile Wrapper, implemented RegisterJITEventListener and NotifyFinalized Added PerfJITEventListener as an LLVM Patch Added IntelJITEventListener Support Added LLVM Patch, removed redundant Oprofile code, wipe blanklines, Register OProfile JITEvent, fixed oprofile compile flag, rebased llvm 6.0 checksums Added Perf profiling support as an LLVM patch. --- .gitignore | 2 + Make.inc | 9 + deps/llvm.mk | 6 + .../llvm-D44892-Perf-integration.patch | 677 ++++++++++++++++++ deps/patches/llvm-OProfile-line-num.patch | 48 ++ src/codegen.cpp | 16 + src/init.c | 10 + src/jitlayers.cpp | 12 +- src/jitlayers.h | 4 +- src/julia_internal.h | 3 + 10 files changed, 785 insertions(+), 2 deletions(-) create mode 100644 deps/patches/llvm-D44892-Perf-integration.patch create mode 100644 deps/patches/llvm-OProfile-line-num.patch diff --git a/.gitignore b/.gitignore index 38b13e7b5741f..fb15ec0dbb5a2 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ /dist-extras /julia /usr +/oprofile_data /usr-staging /Make.user /julia-* @@ -24,4 +25,5 @@ *.jl.mem *.ji +/perf* .DS_Store diff --git a/Make.inc b/Make.inc index 8cb2c1014fa5a..8e0df61ce9fe2 100644 --- a/Make.inc +++ b/Make.inc @@ -72,6 +72,9 @@ endif # Set to 1 to enable profiling with OProfile USE_OPROFILE_JITEVENTS ?= 0 +# Set to 1 to enable profiling with perf +USE_PERF_JITEVENTS ?= 0 + # libc++ is standard on OS X 10.9, but not for earlier releases USE_LIBCPP := 0 @@ -1067,6 +1070,12 @@ ifeq ($(DISABLE_LIBUNWIND), 1) JCPPFLAGS += -DJL_DISABLE_LIBUNWIND endif +# perf +ifeq ($(USE_PERF_JITEVENTS), 1) +JCPPFLAGS += -DJL_USE_PERF_JITEVENTS +endif + + # Intel libraries ifeq ($(USE_INTEL_LIBM), 1) diff --git a/deps/llvm.mk b/deps/llvm.mk index 3b14b5e8dc7de..86954a682f007 100644 --- a/deps/llvm.mk +++ b/deps/llvm.mk @@ -98,6 +98,10 @@ ifeq ($(USE_OPROFILE_JITEVENTS), 1) LLVM_CMAKE += -DLLVM_USE_OPROFILE:BOOL=ON endif # USE_OPROFILE_JITEVENTS +ifeq ($(USE_PERF_JITEVENTS), 1) + LLVM_CMAKE += -DLLVM_USE_PERF:BOOL=ON +endif # USE_PERF_JITEVENTS + ifeq ($(BUILD_LLDB),1) ifeq ($(USECLANG),0) LLVM_CXXFLAGS += -std=c++0x @@ -494,6 +498,8 @@ $(eval $(call LLVM_PATCH,llvm-rL332302)) # remove for 7.0 $(eval $(call LLVM_PATCH,llvm-rL332694)) # remove for 7.0 $(eval $(call LLVM_PATCH,llvm-rL327898)) # remove for 7.0 $(eval $(call LLVM_PATCH,llvm-6.0-DISABLE_ABI_CHECKS)) +$(eval $(call LLVM_PATCH,llvm-OProfile-line-num)) +$(eval $(call LLVM_PATCH,llvm-D44892-Perf-integration)) endif # LLVM_VER # Remove hardcoded OS X requirements in compilter-rt cmake build diff --git a/deps/patches/llvm-D44892-Perf-integration.patch b/deps/patches/llvm-D44892-Perf-integration.patch new file mode 100644 index 0000000000000..6d072ba6128d9 --- /dev/null +++ b/deps/patches/llvm-D44892-Perf-integration.patch @@ -0,0 +1,677 @@ +From 45bc0f0badbdbabaed7d204757c2aad7ab49a3fe Mon Sep 17 00:00:00 2001 +From: DokFaust +Date: Mon, 11 Jun 2018 12:59:42 +0200 +Subject: [PATCH] PerfJITEventListener integration, requires compile flag + LLVM_USE_PERF + +--- + CMakeLists.txt | 13 + + include/llvm/Config/config.h.cmake | 3 + + include/llvm/Config/llvm-config.h.cmake | 3 + + .../llvm/ExecutionEngine/JITEventListener.h | 9 + + lib/ExecutionEngine/CMakeLists.txt | 4 + + lib/ExecutionEngine/LLVMBuild.txt | 2 +- + lib/ExecutionEngine/Orc/LLVMBuild.txt | 2 +- + .../PerfJITEvents/CMakeLists.txt | 5 + + .../PerfJITEvents/LLVMBuild.txt | 23 + + .../PerfJITEvents/PerfJITEventListener.cpp | 492 ++++++++++++++++++ + 10 files changed, 554 insertions(+), 2 deletions(-) + create mode 100644 lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt + create mode 100644 lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt + create mode 100644 lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index f8da6cf9211..fb92c825a46 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -426,6 +426,16 @@ if( LLVM_USE_OPROFILE ) + endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" ) + endif( LLVM_USE_OPROFILE ) + ++option(LLVM_USE_PERF ++ "Use perf JIT interface to inform perf about JIT code" OFF) ++ ++# If enabled, verify we are on a platform that supports perf. ++if( LLVM_USE_PERF ) ++ if( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" ) ++ message(FATAL_ERROR "perf support is available on Linux only.") ++ endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" ) ++endif( LLVM_USE_PERF ) ++ + set(LLVM_USE_SANITIZER "" CACHE STRING + "Define the sanitizer used to build binaries and tests.") + set(LLVM_LIB_FUZZING_ENGINE "" CACHE PATH +@@ -634,6 +644,9 @@ endif (LLVM_USE_INTEL_JITEVENTS) + if (LLVM_USE_OPROFILE) + set(LLVMOPTIONALCOMPONENTS ${LLVMOPTIONALCOMPONENTS} OProfileJIT) + endif (LLVM_USE_OPROFILE) ++if (LLVM_USE_PERF) ++ set(LLVMOPTIONALCOMPONENTS ${LLVMOPTIONALCOMPONENTS} PerfJITEvents) ++endif (LLVM_USE_PERF) + + message(STATUS "Constructing LLVMBuild project information") + execute_process( +diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake +index 940f8420304..17787ed779b 100644 +--- a/include/llvm/Config/config.h.cmake ++++ b/include/llvm/Config/config.h.cmake +@@ -377,6 +377,9 @@ + /* Define if we have the oprofile JIT-support library */ + #cmakedefine01 LLVM_USE_OPROFILE + ++/* Define if we have the perf JIT-support library */ ++#cmakedefine01 LLVM_USE_PERF ++ + /* LLVM version information */ + #cmakedefine LLVM_VERSION_INFO "${LLVM_VERSION_INFO}" + +diff --git a/include/llvm/Config/llvm-config.h.cmake b/include/llvm/Config/llvm-config.h.cmake +index 4daa00f3bc4..8d9c3b24d52 100644 +--- a/include/llvm/Config/llvm-config.h.cmake ++++ b/include/llvm/Config/llvm-config.h.cmake +@@ -65,6 +65,9 @@ + /* Define if we have the oprofile JIT-support library */ + #cmakedefine01 LLVM_USE_OPROFILE + ++/* Define if we have the perf JIT-support library */ ++#cmakedefine01 LLVM_USE_PERF ++ + /* Major version of the LLVM API */ + #define LLVM_VERSION_MAJOR ${LLVM_VERSION_MAJOR} + +diff --git a/include/llvm/ExecutionEngine/JITEventListener.h b/include/llvm/ExecutionEngine/JITEventListener.h +index ff7840f00a4..1cc2c423a8b 100644 +--- a/include/llvm/ExecutionEngine/JITEventListener.h ++++ b/include/llvm/ExecutionEngine/JITEventListener.h +@@ -115,6 +115,15 @@ public: + } + #endif // USE_OPROFILE + ++#if LLVM_USE_PERF ++ static JITEventListener *createPerfJITEventListener(); ++#else ++ static JITEventListener *createPerfJITEventListener() ++ { ++ return nullptr; ++ } ++#endif //USE_PERF ++ + private: + virtual void anchor(); + }; +diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt +index 84b34919e44..893d113a685 100644 +--- a/lib/ExecutionEngine/CMakeLists.txt ++++ b/lib/ExecutionEngine/CMakeLists.txt +@@ -30,3 +30,7 @@ endif( LLVM_USE_OPROFILE ) + if( LLVM_USE_INTEL_JITEVENTS ) + add_subdirectory(IntelJITEvents) + endif( LLVM_USE_INTEL_JITEVENTS ) ++ ++if( LLVM_USE_PERF ) ++ add_subdirectory(PerfJITEvents) ++endif( LLVM_USE_PERF ) +diff --git a/lib/ExecutionEngine/LLVMBuild.txt b/lib/ExecutionEngine/LLVMBuild.txt +index 9d29a41f504..b6e1bda6a51 100644 +--- a/lib/ExecutionEngine/LLVMBuild.txt ++++ b/lib/ExecutionEngine/LLVMBuild.txt +@@ -16,7 +16,7 @@ + ;===------------------------------------------------------------------------===; + + [common] +-subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT Orc ++subdirectories = Interpreter MCJIT RuntimeDyld IntelJITEvents OProfileJIT Orc PerfJITEvents + + [component_0] + type = Library +diff --git a/lib/ExecutionEngine/Orc/LLVMBuild.txt b/lib/ExecutionEngine/Orc/LLVMBuild.txt +index 8f05172e77a..ef4ae64e823 100644 +--- a/lib/ExecutionEngine/Orc/LLVMBuild.txt ++++ b/lib/ExecutionEngine/Orc/LLVMBuild.txt +@@ -19,4 +19,4 @@ + type = Library + name = OrcJIT + parent = ExecutionEngine +-required_libraries = Core ExecutionEngine Object RuntimeDyld Support TransformUtils ++required_libraries = Core ExecutionEngine Object RuntimeDyld Support TransformUtils +diff --git a/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt b/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt +new file mode 100644 +index 00000000000..136cc429d02 +--- /dev/null ++++ b/lib/ExecutionEngine/PerfJITEvents/CMakeLists.txt +@@ -0,0 +1,5 @@ ++add_llvm_library(LLVMPerfJITEvents ++ PerfJITEventListener.cpp ++ ) ++ ++add_dependencies(LLVMPerfJITEvents LLVMCodeGen) +diff --git a/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt b/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt +new file mode 100644 +index 00000000000..b1958a69260 +--- /dev/null ++++ b/lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt +@@ -0,0 +1,23 @@ ++;===- ./lib/ExecutionEngine/PerfJITEvents/LLVMBuild.txt ----------------*- Conf -*--===; ++; ++; The LLVM Compiler Infrastructure ++; ++; This file is distributed under the University of Illinois Open Source ++; License. See LICENSE.TXT for details. ++; ++;===------------------------------------------------------------------------===; ++; ++; This is an LLVMBuild description file for the components in this subdirectory. ++; ++; For more information on the LLVMBuild system, please see: ++; ++; http://llvm.org/docs/LLVMBuild.html ++; ++;===------------------------------------------------------------------------===; ++ ++[component_0] ++type = OptionalLibrary ++name = PerfJITEvents ++parent = ExecutionEngine ++required_libraries = CodeGen Core DebugInfoDWARF ExecutionEngine Object Support TransformUtils ++ +diff --git a/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp b/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp +new file mode 100644 +index 00000000000..c2b97dd59f3 +--- /dev/null ++++ b/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp +@@ -0,0 +1,492 @@ ++//===-- PerfJITEventListener.cpp - Tell Linux's perf about JITted code ----===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// ++// This file defines a JITEventListener object that tells perf about JITted ++// functions, including source line information. ++// ++// Documentation for perf jit integration is available at: ++// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jitdump-specification.txt ++// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jit-interface.txt ++// ++//===----------------------------------------------------------------------===// ++ ++#include "llvm/ADT/Twine.h" ++#include "llvm/Config/config.h" ++#include "llvm/DebugInfo/DWARF/DWARFContext.h" ++#include "llvm/ExecutionEngine/JITEventListener.h" ++#include "llvm/Object/ObjectFile.h" ++#include "llvm/Object/SymbolSize.h" ++#include "llvm/Support/Debug.h" ++#include "llvm/Support/Errno.h" ++#include "llvm/Support/FileSystem.h" ++#include "llvm/Support/MemoryBuffer.h" ++#include "llvm/Support/Mutex.h" ++#include "llvm/Support/MutexGuard.h" ++#include "llvm/Support/Path.h" ++#include "llvm/Support/Process.h" ++#include "llvm/Support/Threading.h" ++#include "llvm/Support/raw_ostream.h" ++ ++#include // mmap() ++#include // getpid() ++#include // clock_gettime(), time(), localtime_r() */ ++#include // for getpid(), read(), close() ++ ++using namespace llvm; ++using namespace llvm::object; ++typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind; ++ ++namespace { ++ ++// language identifier (XXX: should we generate something better from debug ++// info?) ++#define JIT_LANG "llvm-IR" ++#define LLVM_PERF_JIT_MAGIC \ ++ ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 | \ ++ (uint32_t)'D') ++#define LLVM_PERF_JIT_VERSION 1 ++ ++// bit 0: set if the jitdump file is using an architecture-specific timestamp ++// clock source ++#define JITDUMP_FLAGS_ARCH_TIMESTAMP (1ULL << 0) ++ ++struct LLVMPerfJitHeader; ++ ++class PerfJITEventListener : public JITEventListener { ++public: ++ PerfJITEventListener(); ++ ~PerfJITEventListener() { ++ if (MarkerAddr) ++ CloseMarker(); ++ } ++ ++ void NotifyObjectEmitted(const ObjectFile &Obj, ++ const RuntimeDyld::LoadedObjectInfo &L) override; ++ void NotifyFreeingObject(const ObjectFile &Obj) override; ++ ++private: ++ bool InitDebuggingDir(); ++ bool OpenMarker(); ++ void CloseMarker(); ++ static bool FillMachine(LLVMPerfJitHeader &hdr); ++ ++ void NotifyCode(Expected &Symbol, uint64_t CodeAddr, ++ uint64_t CodeSize); ++ void NotifyDebug(uint64_t CodeAddr, DILineInfoTable Lines); ++ ++ // cache lookups ++ pid_t Pid; ++ ++ // base directory for output data ++ std::string JitPath; ++ ++ // output data stream, closed via Dumpstream ++ int DumpFd = -1; ++ ++ // output data stream ++ std::unique_ptr Dumpstream; ++ ++ // prevent concurrent dumps from messing up the output file ++ sys::Mutex Mutex; ++ ++ // perf mmap marker ++ void *MarkerAddr = NULL; ++ ++ // perf support ready ++ bool SuccessfullyInitialized = false; ++ ++ // identifier for functions, primarily to identify when moving them around ++ uint64_t CodeGeneration = 1; ++}; ++ ++// The following are POD struct definitions from the perf jit specification ++ ++enum LLVMPerfJitRecordType { ++ JIT_CODE_LOAD = 0, ++ JIT_CODE_MOVE = 1, // not emitted, code isn't moved ++ JIT_CODE_DEBUG_INFO = 2, ++ JIT_CODE_CLOSE = 3, // not emitted, unnecessary ++ JIT_CODE_UNWINDING_INFO = 4, // not emitted ++ ++ JIT_CODE_MAX ++}; ++ ++struct LLVMPerfJitHeader { ++ uint32_t Magic; // characters "JiTD" ++ uint32_t Version; // header version ++ uint32_t TotalSize; // total size of header ++ uint32_t ElfMach; // elf mach target ++ uint32_t Pad1; // reserved ++ uint32_t Pid; ++ uint64_t Timestamp; // timestamp ++ uint64_t Flags; // flags ++}; ++ ++// record prefix (mandatory in each record) ++struct LLVMPerfJitRecordPrefix { ++ uint32_t Id; // record type identifier ++ uint32_t TotalSize; ++ uint64_t Timestamp; ++}; ++ ++struct LLVMPerfJitRecordCodeLoad { ++ LLVMPerfJitRecordPrefix Prefix; ++ ++ uint32_t Pid; ++ uint32_t Tid; ++ uint64_t Vma; ++ uint64_t CodeAddr; ++ uint64_t CodeSize; ++ uint64_t CodeIndex; ++}; ++ ++struct LLVMPerfJitDebugEntry { ++ uint64_t Addr; ++ int Lineno; // source line number starting at 1 ++ int Discrim; // column discriminator, 0 is default ++ // followed by null terminated filename, \xff\0 if same as previous entry ++}; ++ ++struct LLVMPerfJitRecordDebugInfo { ++ LLVMPerfJitRecordPrefix Prefix; ++ ++ uint64_t CodeAddr; ++ uint64_t NrEntry; ++ // followed by NrEntry LLVMPerfJitDebugEntry records ++}; ++ ++static inline uint64_t timespec_to_ns(const struct timespec *ts) { ++ const uint64_t NanoSecPerSec = 1000000000; ++ return ((uint64_t)ts->tv_sec * NanoSecPerSec) + ts->tv_nsec; ++} ++ ++static inline uint64_t perf_get_timestamp(void) { ++ struct timespec ts; ++ int ret; ++ ++ ret = clock_gettime(CLOCK_MONOTONIC, &ts); ++ if (ret) ++ return 0; ++ ++ return timespec_to_ns(&ts); ++} ++ ++PerfJITEventListener::PerfJITEventListener() : Pid(::getpid()) { ++ // check if clock-source is supported ++ if (!perf_get_timestamp()) { ++ errs() << "kernel does not support CLOCK_MONOTONIC\n"; ++ return; ++ } ++ ++ if (!InitDebuggingDir()) { ++ errs() << "could not initialize debugging directory\n"; ++ return; ++ } ++ ++ std::string Filename; ++ raw_string_ostream FilenameBuf(Filename); ++ FilenameBuf << JitPath << "/jit-" << Pid << ".dump"; ++ ++ // Need to open ourselves, because we need to hand the FD to OpenMarker() and ++ // raw_fd_ostream doesn't expose the FD. ++ using sys::fs::openFileForWrite; ++ if (auto EC = ++ openFileForWrite(FilenameBuf.str(), DumpFd, sys::fs::F_RW, 0666)) { ++ errs() << "could not open JIT dump file " << FilenameBuf.str() << ": " ++ << EC.message() << "\n"; ++ return; ++ } ++ ++ Dumpstream = make_unique(DumpFd, true); ++ ++ LLVMPerfJitHeader Header = {0}; ++ if (!FillMachine(Header)) ++ return; ++ ++ // signal this process emits JIT information ++ if (!OpenMarker()) ++ return; ++ ++ // emit dumpstream header ++ Header.Magic = LLVM_PERF_JIT_MAGIC; ++ Header.Version = LLVM_PERF_JIT_VERSION; ++ Header.TotalSize = sizeof(Header); ++ Header.Pid = Pid; ++ Header.Timestamp = perf_get_timestamp(); ++ Dumpstream->write(reinterpret_cast(&Header), sizeof(Header)); ++ ++ // Everything initialized, can do profiling now. ++ if (!Dumpstream->has_error()) ++ SuccessfullyInitialized = true; ++} ++ ++void PerfJITEventListener::NotifyObjectEmitted( ++ const ObjectFile &Obj, const RuntimeDyld::LoadedObjectInfo &L) { ++ ++ if (!SuccessfullyInitialized) ++ return; ++ ++ OwningBinary DebugObjOwner = L.getObjectForDebug(Obj); ++ const ObjectFile &DebugObj = *DebugObjOwner.getBinary(); ++ ++ // Get the address of the object image for use as a unique identifier ++ std::unique_ptr Context = DWARFContext::create(DebugObj); ++ ++ // Use symbol info to iterate over functions in the object. ++ for (const std::pair &P : computeSymbolSizes(DebugObj)) { ++ SymbolRef Sym = P.first; ++ std::string SourceFileName; ++ ++ Expected SymTypeOrErr = Sym.getType(); ++ if (!SymTypeOrErr) { ++ // There's not much we can with errors here ++ consumeError(SymTypeOrErr.takeError()); ++ continue; ++ } ++ SymbolRef::Type SymType = *SymTypeOrErr; ++ if (SymType != SymbolRef::ST_Function) ++ continue; ++ ++ Expected Name = Sym.getName(); ++ if (!Name) { ++ consumeError(Name.takeError()); ++ continue; ++ } ++ ++ Expected AddrOrErr = Sym.getAddress(); ++ if (!AddrOrErr) { ++ consumeError(AddrOrErr.takeError()); ++ continue; ++ } ++ uint64_t Addr = *AddrOrErr; ++ uint64_t Size = P.second; ++ ++ // According to spec debugging info has to come before loading the ++ // corresonding code load. ++ DILineInfoTable Lines = Context->getLineInfoForAddressRange( ++ Addr, Size, FileLineInfoKind::AbsoluteFilePath); ++ ++ NotifyDebug(Addr, Lines); ++ NotifyCode(Name, Addr, Size); ++ } ++ ++ Dumpstream->flush(); ++} ++ ++void PerfJITEventListener::NotifyFreeingObject(const ObjectFile &Obj) { ++ // perf currently doesn't have an interface for unloading. But munmap()ing the ++ // code section does, so that's ok. ++} ++ ++bool PerfJITEventListener::InitDebuggingDir() { ++ time_t Time; ++ struct tm LocalTime; ++ char TimeBuffer[sizeof("YYYYMMDD")]; ++ SmallString<64> Path; ++ ++ // search for location to dump data to ++ if (const char *BaseDir = getenv("JITDUMPDIR")) ++ Path.append(BaseDir); ++ else if (!sys::path::home_directory(Path)) ++ Path = "."; ++ ++ // create debug directory ++ Path += "/.debug/jit/"; ++ if (auto EC = sys::fs::create_directories(Path)) { ++ errs() << "could not create jit cache directory " << Path << ": " ++ << EC.message() << "\n"; ++ return false; ++ } ++ ++ // create unique directory for dump data related to this process ++ time(&Time); ++ localtime_r(&Time, &LocalTime); ++ strftime(TimeBuffer, sizeof(TimeBuffer), "%Y%m%d", &LocalTime); ++ Path += JIT_LANG "-jit-"; ++ Path += TimeBuffer; ++ ++ SmallString<128> UniqueDebugDir; ++ ++ using sys::fs::createUniqueDirectory; ++ if (auto EC = createUniqueDirectory(Path, UniqueDebugDir)) { ++ errs() << "could not create unique jit cache directory " << UniqueDebugDir ++ << ": " << EC.message() << "\n"; ++ return false; ++ } ++ ++ JitPath = UniqueDebugDir.str(); ++ ++ return true; ++} ++ ++bool PerfJITEventListener::OpenMarker() { ++ // We mmap the jitdump to create an MMAP RECORD in perf.data file. The mmap ++ // is captured either live (perf record running when we mmap) or in deferred ++ // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump ++ // file for more meta data info about the jitted code. Perf report/annotate ++ // detect this special filename and process the jitdump file. ++ // ++ // Mapping must be PROT_EXEC to ensure it is captured by perf record ++ // even when not using -d option. ++ MarkerAddr = ::mmap(NULL, sys::Process::getPageSize(), PROT_READ | PROT_EXEC, ++ MAP_PRIVATE, DumpFd, 0); ++ ++ if (MarkerAddr == MAP_FAILED) { ++ errs() << "could not mmap JIT marker\n"; ++ return false; ++ } ++ return true; ++} ++ ++void PerfJITEventListener::CloseMarker() { ++ if (!MarkerAddr) ++ return; ++ ++ munmap(MarkerAddr, sys::Process::getPageSize()); ++ MarkerAddr = nullptr; ++} ++ ++bool PerfJITEventListener::FillMachine(LLVMPerfJitHeader &hdr) { ++ char id[16]; ++ struct { ++ uint16_t e_type; ++ uint16_t e_machine; ++ } info; ++ ++ size_t RequiredMemory = sizeof(id) + sizeof(info); ++ ++ ErrorOr> MB = ++ MemoryBuffer::getFileSlice("/proc/self/exe", ++ RequiredMemory, ++ 0); ++ ++ // This'll not guarantee that enough data was actually read from the ++ // underlying file. Instead the trailing part of the buffer would be ++ // zeroed. Given the ELF signature check below that seems ok though, ++ // it's unlikely that the file ends just after that, and the ++ // consequence would just be that perf wouldn't recognize the ++ // signature. ++ if (auto EC = MB.getError()) { ++ errs() << "could not open /proc/self/exe: " << EC.message() << "\n"; ++ return false; ++ } ++ ++ memcpy(&id, (*MB)->getBufferStart(), sizeof(id)); ++ memcpy(&info, (*MB)->getBufferStart() + sizeof(id), sizeof(info)); ++ ++ // check ELF signature ++ if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F') { ++ errs() << "invalid elf signature\n"; ++ return false; ++ } ++ ++ hdr.ElfMach = info.e_machine; ++ ++ return true; ++} ++ ++void PerfJITEventListener::NotifyCode(Expected &Symbol, ++ uint64_t CodeAddr, uint64_t CodeSize) { ++ assert(SuccessfullyInitialized); ++ ++ // 0 length functions can't have samples. ++ if (CodeSize == 0) ++ return; ++ ++ LLVMPerfJitRecordCodeLoad rec; ++ rec.Prefix.Id = JIT_CODE_LOAD; ++ rec.Prefix.TotalSize = sizeof(rec) + // debug record itself ++ Symbol->size() + 1 + // symbol name ++ CodeSize; // and code ++ rec.Prefix.Timestamp = perf_get_timestamp(); ++ ++ rec.CodeSize = CodeSize; ++ rec.Vma = 0; ++ rec.CodeAddr = CodeAddr; ++ rec.Pid = Pid; ++ rec.Tid = get_threadid(); ++ ++ // avoid interspersing output ++ MutexGuard Guard(Mutex); ++ ++ rec.CodeIndex = CodeGeneration++; // under lock! ++ ++ Dumpstream->write(reinterpret_cast(&rec), sizeof(rec)); ++ Dumpstream->write(Symbol->data(), Symbol->size() + 1); ++ Dumpstream->write(reinterpret_cast(CodeAddr), CodeSize); ++} ++ ++void PerfJITEventListener::NotifyDebug(uint64_t CodeAddr, ++ DILineInfoTable Lines) { ++ assert(SuccessfullyInitialized); ++ ++ // Didn't get useful debug info. ++ if (Lines.empty()) ++ return; ++ ++ LLVMPerfJitRecordDebugInfo rec; ++ rec.Prefix.Id = JIT_CODE_DEBUG_INFO; ++ rec.Prefix.TotalSize = sizeof(rec); // will be increased further ++ rec.Prefix.Timestamp = perf_get_timestamp(); ++ rec.CodeAddr = CodeAddr; ++ rec.NrEntry = Lines.size(); ++ ++ // compute total size size of record (variable due to filenames) ++ DILineInfoTable::iterator Begin = Lines.begin(); ++ DILineInfoTable::iterator End = Lines.end(); ++ for (DILineInfoTable::iterator It = Begin; It != End; ++It) { ++ DILineInfo &line = It->second; ++ rec.Prefix.TotalSize += sizeof(LLVMPerfJitDebugEntry); ++ rec.Prefix.TotalSize += line.FileName.size() + 1; ++ } ++ ++ // The debug_entry describes the source line information. It is defined as ++ // follows in order: ++ // * uint64_t code_addr: address of function for which the debug information ++ // is generated ++ // * uint32_t line : source file line number (starting at 1) ++ // * uint32_t discrim : column discriminator, 0 is default ++ // * char name[n] : source file name in ASCII, including null termination ++ ++ // avoid interspersing output ++ MutexGuard Guard(Mutex); ++ ++ Dumpstream->write(reinterpret_cast(&rec), sizeof(rec)); ++ ++ for (DILineInfoTable::iterator It = Begin; It != End; ++It) { ++ LLVMPerfJitDebugEntry LineInfo; ++ DILineInfo &Line = It->second; ++ ++ LineInfo.Addr = It->first; ++ // The function re-created by perf is preceded by a elf ++ // header. Need to adjust for that, otherwise the results are ++ // wrong. ++ LineInfo.Addr += 0x40; ++ LineInfo.Lineno = Line.Line; ++ LineInfo.Discrim = Line.Discriminator; ++ ++ Dumpstream->write(reinterpret_cast(&LineInfo), ++ sizeof(LineInfo)); ++ Dumpstream->write(Line.FileName.c_str(), Line.FileName.size() + 1); ++ } ++} ++ ++// There should be only a single event listener per process, otherwise perf gets ++// confused. ++llvm::ManagedStatic PerfListener; ++ ++} // end anonymous namespace ++ ++namespace llvm { ++JITEventListener *JITEventListener::createPerfJITEventListener() { ++ return &*PerfListener; ++} ++ ++} // namespace llvm ++ +-- +2.17.1 + diff --git a/deps/patches/llvm-OProfile-line-num.patch b/deps/patches/llvm-OProfile-line-num.patch new file mode 100644 index 0000000000000..03b2ca810d7f5 --- /dev/null +++ b/deps/patches/llvm-OProfile-line-num.patch @@ -0,0 +1,48 @@ +commit 4840cf7299bb312125d41fc84733c15c2370f18e +Author: DokFaust +Date: Fri Jun 8 19:23:01 2018 +0200 + + Add debug line-level code information to OProfile module + +diff --git a/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt b/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt +index 7d5550046a5..ea100286318 100644 +--- a/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt ++++ b/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt +@@ -24 +24 @@ parent = ExecutionEngine +-required_libraries = Support Object ExecutionEngine ++required_libraries = DebugInfoDWARF Support Object ExecutionEngine +diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp +index 3581d645839..045ecb82853 100644 +--- a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp ++++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp +@@ -26,0 +27,2 @@ ++#include "llvm/DebugInfo/DIContext.h" ++#include "llvm/DebugInfo/DWARF/DWARFContext.h" +@@ -86,0 +89,2 @@ void OProfileJITEventListener::NotifyObjectEmitted( ++ std::unique_ptr Context = DWARFContext::create(DebugObj); ++ std::string SourceFileName; +@@ -111 +115,23 @@ void OProfileJITEventListener::NotifyObjectEmitted( +- // TODO: support line number info (similar to IntelJITEventListener.cpp) ++ DILineInfoTable Lines = Context->getLineInfoForAddressRange(Addr, Size); ++ DILineInfoTable::iterator Begin = Lines.begin(); ++ DILineInfoTable::iterator End = Lines.end(); ++ size_t i = 0; ++ ++ size_t num_entries = std::distance(Begin, End); ++ static struct debug_line_info* debug_line; ++ debug_line = (struct debug_line_info * )calloc(num_entries, sizeof(struct debug_line_info)); ++ ++ for(DILineInfoTable::iterator It=Begin; It != End; ++It){ ++ i = std::distance(Begin,It); ++ debug_line[i].vma = (unsigned long) It->first; ++ debug_line[i].lineno = It->second.Line; ++ SourceFileName = Lines.front().second.FileName; ++ debug_line[i].filename = const_cast(SourceFileName.c_str()); ++ } ++ ++ if(Wrapper->op_write_debug_line_info((void*) Addr, num_entries, debug_line) == -1) { ++ DEBUG(dbgs() << "Failed to tell OProfiler about debug object at [" ++ << (void*) Addr << "-" << ((char *) Addr + Size) ++ << "]\n"); ++ continue; ++ } diff --git a/src/codegen.cpp b/src/codegen.cpp index b63e5d911833f..8a02462db1045 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -7467,6 +7467,22 @@ extern "C" void *jl_init_llvm(void) jl_setup_module(engine_module); jl_setup_module(m); return (void*)m; + +#ifdef JL_USE_INTEL_JITEVENTS + if (jl_using_intel_jitevents) + jl_ExecutionEngine->RegisterJITEventListener(JITEventListener::createIntelJITEventListener()); +#endif + +#ifdef JL_USE_OPROFILE_JITEVENTS + if (jl_using_oprofile_jitevents) + jl_ExecutionEngine->RegisterJITEventListener(JITEventListener::createOProfileJITEventListener()); +#endif + +#ifdef JL_USE_PERF_JITEVENTS + if (jl_using_perf_jitevents) { + jl_ExecutionEngine->RegisterJITEventListener(JITEventListener::createPerfJITEventListener()); + } +#endif } extern "C" void jl_init_codegen(void) diff --git a/src/init.c b/src/init.c index fe4ddece5218d..0989b2c2b6b1d 100644 --- a/src/init.c +++ b/src/init.c @@ -460,6 +460,10 @@ char jl_using_intel_jitevents; // Non-zero if running under Intel VTune Amplifie char jl_using_oprofile_jitevents = 0; // Non-zero if running under OProfile #endif +#ifdef JL_USE_PERF_JITEVENTS +char jl_using_perf_jitevents = 0; +#endif + int isabspath(const char *in) { #ifdef _OS_WINDOWS_ @@ -687,6 +691,12 @@ void _julia_init(JL_IMAGE_SEARCH rel) } #endif +#if defined(JL_USE_PERF_JITEVENTS) + const char *jit_profiling = getenv("ENABLE_JITPROFILING"); + if (jit_profiling && atoi(jit_profiling)) { + jl_using_perf_jitevents= 1; + } +#endif #if defined(__linux__) int ncores = jl_cpu_threads(); diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 8db23b00307aa..c43f0a2e017e7 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -410,6 +410,7 @@ void JuliaOJIT::DebugObjectRegistrar::registerObject(RTDyldObjHandleT H, const O NotifyGDB(SavedObject); } + JIT.NotifyFinalizer(*Object, *LO); SavedObjects.push_back(std::move(SavedObject)); ORCNotifyObjectEmitted(JuliaListener.get(), *Object, @@ -681,7 +682,16 @@ Function *JuliaOJIT::FindFunctionNamed(const std::string &Name) void JuliaOJIT::RegisterJITEventListener(JITEventListener *L) { - // TODO + if (!L) + return; + EventListeners.push_back(L); +} + +void JuliaOJIT::NotifyFinalizer(const object::ObjectFile &Obj, + const RuntimeDyld::LoadedObjectInfo &LoadedObjectInfo) +{ + for (auto &Listener : EventListeners) + Listener->NotifyObjectEmitted(Obj, LoadedObjectInfo); } const DataLayout& JuliaOJIT::getDataLayout() const diff --git a/src/jitlayers.h b/src/jitlayers.h index 2b9041dc26af5..aadda5d534cfb 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -142,6 +142,9 @@ class JuliaOJIT { JuliaOJIT(TargetMachine &TM); + void RegisterJITEventListener(JITEventListener *L); + std::vector EventListeners; + void NotifyFinalizer(const object::ObjectFile &Obj, const RuntimeDyld::LoadedObjectInfo &LoadedObjectInfo); void addGlobalMapping(StringRef Name, uint64_t Addr); void addGlobalMapping(const GlobalValue *GV, void *Addr); void *getPointerToGlobalIfAvailable(StringRef S); @@ -153,7 +156,6 @@ class JuliaOJIT { uint64_t getGlobalValueAddress(const std::string &Name); uint64_t getFunctionAddress(const std::string &Name); Function *FindFunctionNamed(const std::string &Name); - void RegisterJITEventListener(JITEventListener *L); const DataLayout& getDataLayout() const; const Triple& getTargetTriple() const; private: diff --git a/src/julia_internal.h b/src/julia_internal.h index 36cac3eaf9210..ede2b78d80758 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -473,6 +473,9 @@ extern char jl_using_intel_jitevents; #ifdef JL_USE_OPROFILE_JITEVENTS extern char jl_using_oprofile_jitevents; #endif +#ifdef JL_USE_PERF_JITEVENTS +extern char jl_using_perf_jitevents; +#endif extern size_t jl_arr_xtralloc_limit; void jl_init_types(void);