Skip to content

Commit

Permalink
Add pass to load definitions of common functions from bc files
Browse files Browse the repository at this point in the history
Starting with cblas_ddot
  • Loading branch information
reikdas committed Jul 10, 2021
1 parent a75e86a commit 7313c29
Show file tree
Hide file tree
Showing 11 changed files with 332 additions and 1 deletion.
47 changes: 47 additions & 0 deletions .github/workflows/bcload.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
name: Bitcode loading CI

on: [push]

jobs:
build:
name: Bitcode loading CI LLVM ${{ matrix.llvm }} ${{ matrix.build }} ${{ matrix.os }}
runs-on: ${{ matrix.os }}

strategy:
fail-fast: false
matrix:
llvm: ["7", "8", "9", "10", "11", "12"]
build: ["Release"] # "RelWithDebInfo"
os: [ubuntu-20.04, ubuntu-18.04]

exclude:
# How to install FileCheck on ubuntu-18.04?
- os: ubuntu-18.04
llvm: 8

timeout-minutes: 30
steps:
- name: add llvm
run: |
wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key|sudo apt-key add -
sudo apt-add-repository "deb http://apt.llvm.org/`lsb_release -c | cut -f2`/ llvm-toolchain-`lsb_release -c | cut -f2`-${{ matrix.llvm }} main" || true
sudo apt-get install -y autoconf cmake gcc g++ libtool gfortran llvm-${{ matrix.llvm }}-dev libomp-${{ matrix.llvm }}-dev clang-${{ matrix.llvm }} libeigen3-dev libboost-dev
sudo python3 -m pip install --upgrade pip setuptools
sudo python3 -m pip install lit
sudo touch /usr/lib/llvm-${{ matrix.llvm }}/bin/yaml-bench
if [[ '${{ matrix.llvm }}' == '7' || '${{ matrix.llvm }}' == '8' || '${{ matrix.llvm }}' == '9' ]]; then
sudo apt-get install -y llvm-${{ matrix.llvm }}-tools
fi
- uses: actions/checkout@v1
with:
fetch-depth: 1
- name: mkdir
run: cd enzyme && rm -rf build && mkdir build
- name: cmake
run: |
cd enzyme/build
cmake .. -DLLVM_EXTERNAL_LIT=`which lit` -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DLLVM_DIR=/usr/lib/llvm-${{ matrix.llvm }}/lib/cmake/llvm
- name: make
run: cd enzyme/build && make -j`nproc`
- name: make check-bcpass
run: cd enzyme/build && make check-bcpass -j`nproc`
57 changes: 57 additions & 0 deletions enzyme/BCLoad/BCLoader.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#include "llvm/IR/Module.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/Linker/Linker.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"

#include <set>
#include <string>

using namespace llvm;

cl::opt<std::string> BCPath("bcpath", cl::init(""), cl::Hidden,
cl::desc("Path to BC definitions"));

namespace {
class BCLoader : public ModulePass {
public:
static char ID;
BCLoader() : ModulePass(ID) {}

bool runOnModule(Module &M) override {
std::set<std::string> bcfuncs = {"cblas_ddot"};
for (std::string name : bcfuncs) {
if (name == "cblas_ddot") {
SMDiagnostic Err;
#if LLVM_VERSION_MAJOR <= 10
auto BC = llvm::parseIRFile(
BCPath + "/cblas_ddot_double.bc", Err, M.getContext(), true,
M.getDataLayout().getStringRepresentation());
#else
auto BC = llvm::parseIRFile(
BCPath + "/cblas_ddot_double.bc", Err, M.getContext(),
[&](StringRef) {
return Optional<std::string>(
M.getDataLayout().getStringRepresentation());
});
#endif
if (!BC)
Err.print("bcloader", llvm::errs());
assert(BC);
Linker L(M);
L.linkInModule(std::move(BC));
}
}
return true;
}
};
} // namespace

char BCLoader::ID = 0;

static RegisterPass<BCLoader> X("bcloader",
"Link bitcode files for known functions");

ModulePass *createBCLoaderPass() { return new BCLoader(); }
3 changes: 3 additions & 0 deletions enzyme/BCLoad/BCLoader.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#include "llvm/Pass.h"

llvm::ModulePass *createBCLoaderPass();
39 changes: 39 additions & 0 deletions enzyme/BCLoad/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

if (${LLVM_VERSION_MAJOR} LESS 8)
add_llvm_loadable_module( BCPass-${LLVM_VERSION_MAJOR}
BCLoader.cpp ClangBCLoader.cpp
DEPENDS
intrinsics_gen
PLUGIN_TOOL
opt
)
else()
# on windows `PLUGIN_TOOL` doesn't link against LLVM.dll
if ((WIN32 OR CYGWIN) AND LLVM_LINK_LLVM_DYLIB)
add_llvm_library( BCPass-${LLVM_VERSION_MAJOR}
BCLoader.cpp ClangBCLoader.cpp
MODULE
DEPENDS
intrinsics_gen
LINK_COMPONENTS
LLVM
)
else()
add_llvm_library( BCPass-${LLVM_VERSION_MAJOR}
BCLoader.cpp ClangBCLoader.cpp
MODULE
DEPENDS
intrinsics_gen
PLUGIN_TOOL
opt
)
endif()
endif()

if (APPLE)
# Darwin-specific linker flags for loadable modules.
set_target_properties(BCPass-${LLVM_VERSION_MAJOR} PROPERTIES
LINK_FLAGS "-Wl,-flat_namespace -Wl,-undefined -Wl,suppress")
endif()
20 changes: 20 additions & 0 deletions enzyme/BCLoad/ClangBCLoader.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"

#include "BCLoader.h"

#include "llvm/LinkAllPasses.h"

using namespace llvm;

// This function is of type PassManagerBuilder::ExtensionFn
static void loadPass(const PassManagerBuilder &Builder,
legacy::PassManagerBase &PM) {
PM.add(createBCLoaderPass());
}

// These constructors add our pass to a list of global extensions.
static RegisterStandardPasses
clangtoolLoader_Ox(PassManagerBuilder::EP_ModuleOptimizerEarly, loadPass);
static RegisterStandardPasses
clangtoolLoader_O0(PassManagerBuilder::EP_EnabledOnOptLevel0, loadPass);
1 change: 1 addition & 0 deletions enzyme/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/include/SCEV/ScalarEvolutionExpander.h"
include_directories("${CMAKE_CURRENT_BINARY_DIR}/include")

add_subdirectory(Enzyme)
add_subdirectory(BCLoad)
add_subdirectory(test)

# The benchmarks data are not in git-exported source archives to minimize size.
Expand Down
Binary file added enzyme/bclib/cblas_ddot_double.bc
Binary file not shown.
9 changes: 9 additions & 0 deletions enzyme/test/BCLoader/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Run regression and unit tests
add_lit_testsuite(check-bcpass "Running BCPass regression tests"
${CMAKE_CURRENT_BINARY_DIR}
DEPENDS ${ENZYME_TEST_DEPS}
ARGS -v
)

set_target_properties(check-bcpass PROPERTIES FOLDER "Tests")

150 changes: 150 additions & 0 deletions enzyme/test/BCLoader/bcloader-ddot.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
;RUN: if [ %llvmver -ge 10 ]; then %clang %s -Xclang -load -Xclang %loadBC -mllvm -bcpath=%BClibdir -S -emit-llvm -o - | %FileCheck %s; fi

;#include <cblas.h>
;#include <stdio.h>
;
;extern double __enzyme_autodiff(void *, double *, double *, double *,
; double *);
;
;double g(double *m, double *n) {
; double x = cblas_ddot(3, m, 1, n, 1);
; m[0] = 11.0;
; m[1] = 12.0;
; m[2] = 13.0;
; double y = x * x;
; return y;
;}
;
;int main() {
; double m[3] = {1, 2, 3};
; double m1[3] = {0, 0, 0};
; double n[3] = {4, 5, 6};
; double n1[3] = {0, 0, 0};
; double val = __enzyme_autodiff((void*)g, m, m1, n, n1);
; return 1;
;}

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@__const.main.m = private unnamed_addr constant [3 x double] [double 1.000000e+00, double 2.000000e+00, double 3.000000e+00], align 16
@__const.main.n = private unnamed_addr constant [3 x double] [double 4.000000e+00, double 5.000000e+00, double 6.000000e+00], align 16

; Function Attrs: noinline nounwind optnone uwtable
define dso_local double @g(double* %m, double* %n) {
entry:
%m.addr = alloca double*, align 8
%n.addr = alloca double*, align 8
%x = alloca double, align 8
%y = alloca double, align 8
store double* %m, double** %m.addr, align 8
store double* %n, double** %n.addr, align 8
%0 = load double*, double** %m.addr, align 8
%1 = load double*, double** %n.addr, align 8
%call = call double @cblas_ddot(i32 3, double* %0, i32 1, double* %1, i32 1)
store double %call, double* %x, align 8
%2 = load double*, double** %m.addr, align 8
%arrayidx = getelementptr inbounds double, double* %2, i64 0
store double 1.100000e+01, double* %arrayidx, align 8
%3 = load double*, double** %m.addr, align 8
%arrayidx1 = getelementptr inbounds double, double* %3, i64 1
store double 1.200000e+01, double* %arrayidx1, align 8
%4 = load double*, double** %m.addr, align 8
%arrayidx2 = getelementptr inbounds double, double* %4, i64 2
store double 1.300000e+01, double* %arrayidx2, align 8
%5 = load double, double* %x, align 8
%6 = load double, double* %x, align 8
%mul = fmul double %5, %6
store double %mul, double* %y, align 8
%7 = load double, double* %y, align 8
ret double %7
}

declare dso_local double @cblas_ddot(i32, double*, i32, double*, i32)

; Function Attrs: noinline nounwind optnone uwtable
define dso_local i32 @main() {
entry:
%retval = alloca i32, align 4
%m = alloca [3 x double], align 16
%m1 = alloca [3 x double], align 16
%n = alloca [3 x double], align 16
%n1 = alloca [3 x double], align 16
%val = alloca double, align 8
store i32 0, i32* %retval, align 4
%0 = bitcast [3 x double]* %m to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast ([3 x double]* @__const.main.m to i8*), i64 24, i1 false)
%1 = bitcast [3 x double]* %m1 to i8*
call void @llvm.memset.p0i8.i64(i8* align 16 %1, i8 0, i64 24, i1 false)
%2 = bitcast [3 x double]* %n to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %2, i8* align 16 bitcast ([3 x double]* @__const.main.n to i8*), i64 24, i1 false)
%3 = bitcast [3 x double]* %n1 to i8*
call void @llvm.memset.p0i8.i64(i8* align 16 %3, i8 0, i64 24, i1 false)
%arraydecay = getelementptr inbounds [3 x double], [3 x double]* %m, i32 0, i32 0
%arraydecay1 = getelementptr inbounds [3 x double], [3 x double]* %m1, i32 0, i32 0
%arraydecay2 = getelementptr inbounds [3 x double], [3 x double]* %n, i32 0, i32 0
%arraydecay3 = getelementptr inbounds [3 x double], [3 x double]* %n1, i32 0, i32 0
%call = call double @__enzyme_autodiff(i8* bitcast (double (double*, double*)* @g to i8*), double* %arraydecay, double* %arraydecay1, double* %arraydecay2, double* %arraydecay3)
store double %call, double* %val, align 8
ret i32 1
}

; Function Attrs: argmemonly nounwind
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)

; Function Attrs: argmemonly nounwind
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1)

declare dso_local double @__enzyme_autodiff(i8*, double*, double*, double*, double*)

;CHECK: define dso_local double @cblas_ddot(i32 %__N, double* %__X, i32 %__incX, double* %__Y, i32 %__incY)
;CHECK-NEXT: entry:
;CHECK-NEXT: %__N.addr = alloca i32, align 4
;CHECK-NEXT: %__X.addr = alloca double*, align 8
;CHECK-NEXT: %__incX.addr = alloca i32, align 4
;CHECK-NEXT: %__Y.addr = alloca double*, align 8
;CHECK-NEXT: %__incY.addr = alloca i32, align 4
;CHECK-NEXT: %sum = alloca double, align 8
;CHECK-NEXT: %i = alloca i32, align 4
;CHECK-NEXT: store i32 %__N, i32* %__N.addr, align 4
;CHECK-NEXT: store double* %__X, double** %__X.addr, align 8
;CHECK-NEXT: store i32 %__incX, i32* %__incX.addr, align 4
;CHECK-NEXT: store double* %__Y, double** %__Y.addr, align 8
;CHECK-NEXT: store i32 %__incY, i32* %__incY.addr, align 4
;CHECK-NEXT: store double 0.000000e+00, double* %sum, align 8
;CHECK-NEXT: store i32 0, i32* %i, align 4
;CHECK-NEXT: br label %for.cond

;CHECK: for.cond: ; preds = %for.inc, %entry
;CHECK-NEXT: %0 = load i32, i32* %i, align 4
;CHECK-NEXT: %1 = load i32, i32* %__N.addr, align 4
;CHECK-NEXT: %cmp = icmp slt i32 %0, %1
;CHECK-NEXT: br i1 %cmp, label %for.body, label %for.end

;CHECK: for.body: ; preds = %for.cond
;CHECK-NEXT: %2 = load double, double* %sum, align 8
;CHECK-NEXT: %3 = load double*, double** %__X.addr, align 8
;CHECK-NEXT: %4 = load i32, i32* %i, align 4
;CHECK-NEXT: %idxprom = sext i32 %4 to i64
;CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %3, i64 %idxprom
;CHECK-NEXT: %5 = load double, double* %arrayidx, align 8
;CHECK-NEXT: %6 = load double*, double** %__Y.addr, align 8
;CHECK-NEXT: %7 = load i32, i32* %i, align 4
;CHECK-NEXT: %idxprom1 = sext i32 %7 to i64
;CHECK-NEXT: %arrayidx2 = getelementptr inbounds double, double* %6, i64 %idxprom1
;CHECK-NEXT: %8 = load double, double* %arrayidx2, align 8
;CHECK-NEXT: %mul = fmul double %5, %8
;CHECK-NEXT: %add = fadd double %2, %mul
;CHECK-NEXT: store double %add, double* %sum, align 8
;CHECK-NEXT: br label %for.inc

;CHECK: for.inc: ; preds = %for.body
;CHECK-NEXT: %9 = load i32, i32* %i, align 4
;CHECK-NEXT: %inc = add nsw i32 %9, 1
;CHECK-NEXT: store i32 %inc, i32* %i, align 4
;CHECK-NEXT: br label %for.cond

;CHECK: for.end: ; preds = %for.cond
;CHECK-NEXT: %10 = load double, double* %sum, align 8
;CHECK-NEXT: ret double %10
;CHECK-NEXT: }
3 changes: 2 additions & 1 deletion enzyme/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@ configure_lit_site_cfg(
${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py
)

set(ENZYME_TEST_DEPS LLVMEnzyme-${LLVM_VERSION_MAJOR})
set(ENZYME_TEST_DEPS LLVMEnzyme-${LLVM_VERSION_MAJOR} BCPass-${LLVM_VERSION_MAJOR})

add_subdirectory(ActivityAnalysis)
add_subdirectory(TypeAnalysis)
add_subdirectory(Enzyme/ReverseMode)
add_subdirectory(Enzyme/ForwardMode)
add_subdirectory(Integration/ReverseMode)
add_subdirectory(Integration/ForwardMode)
add_subdirectory(BCLoader)

add_custom_target(check-enzyme DEPENDS check-enzyme-reverse check-enzyme-forward)
add_custom_target(check-enzyme-integration DEPENDS check-enzyme-integration-reverse check-enzyme-integration-forward)
4 changes: 4 additions & 0 deletions enzyme/test/lit.site.cfg.py.in
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ config.substitutions.append(('%clang', config.llvm_tools_dir + "/clang"))
config.substitutions.append(('%loadEnzyme', ''
+ ' -load=@ENZYME_BINARY_DIR@/Enzyme/LLVMEnzyme-' + config.llvm_ver + config.llvm_shlib_ext
))
config.substitutions.append(('%loadBC', ''
+ ' @ENZYME_BINARY_DIR@/BCLoad/BCPass-' + config.llvm_ver + config.llvm_shlib_ext
))
config.substitutions.append(('%BClibdir', '@ENZYME_SOURCE_DIR@/bclib/'))

# Let the main config do the real work.
lit_config.load_config(config, "@ENZYME_SOURCE_DIR@/test/lit.cfg.py")

0 comments on commit 7313c29

Please sign in to comment.