Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add pass to load definitions of common functions from bc files - starting with cblas_ddot #220

Merged
merged 1 commit into from
Jul 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions .github/workflows/bcload.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
name: Bitcode loading CI

on: [push]

jobs:
build:
name: Bitcode loading CI LLVM ${{ matrix.llvm }} ${{ matrix.build }} ${{ matrix.os }}
runs-on: ${{ matrix.os }}

strategy:
fail-fast: false
matrix:
llvm: ["7", "8", "9", "10", "11", "12"]
build: ["Release"] # "RelWithDebInfo"
os: [ubuntu-20.04, ubuntu-18.04]

exclude:
# How to install FileCheck on ubuntu-18.04?
- os: ubuntu-18.04
llvm: 8

timeout-minutes: 30
steps:
- name: add llvm
run: |
wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key|sudo apt-key add -
sudo apt-add-repository "deb http://apt.llvm.org/`lsb_release -c | cut -f2`/ llvm-toolchain-`lsb_release -c | cut -f2`-${{ matrix.llvm }} main" || true
sudo apt-get install -y autoconf cmake gcc g++ libtool gfortran llvm-${{ matrix.llvm }}-dev libomp-${{ matrix.llvm }}-dev clang-${{ matrix.llvm }} libeigen3-dev libboost-dev
sudo python3 -m pip install --upgrade pip setuptools
sudo python3 -m pip install lit
sudo touch /usr/lib/llvm-${{ matrix.llvm }}/bin/yaml-bench
if [[ '${{ matrix.llvm }}' == '7' || '${{ matrix.llvm }}' == '8' || '${{ matrix.llvm }}' == '9' ]]; then
sudo apt-get install -y llvm-${{ matrix.llvm }}-tools
fi
- uses: actions/checkout@v1
with:
fetch-depth: 1
- name: mkdir
run: cd enzyme && rm -rf build && mkdir build
- name: cmake
run: |
cd enzyme/build
cmake .. -DLLVM_EXTERNAL_LIT=`which lit` -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DLLVM_DIR=/usr/lib/llvm-${{ matrix.llvm }}/lib/cmake/llvm
- name: make
run: cd enzyme/build && make -j`nproc`
- name: make check-bcpass
run: cd enzyme/build && make check-bcpass -j`nproc`
57 changes: 57 additions & 0 deletions enzyme/BCLoad/BCLoader.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#include "llvm/IR/Module.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/Linker/Linker.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"

#include <set>
#include <string>

using namespace llvm;

cl::opt<std::string> BCPath("bcpath", cl::init(""), cl::Hidden,
cl::desc("Path to BC definitions"));

namespace {
class BCLoader : public ModulePass {
public:
static char ID;
BCLoader() : ModulePass(ID) {}

bool runOnModule(Module &M) override {
std::set<std::string> bcfuncs = {"cblas_ddot"};
for (std::string name : bcfuncs) {
if (name == "cblas_ddot") {
SMDiagnostic Err;
#if LLVM_VERSION_MAJOR <= 10
auto BC = llvm::parseIRFile(
BCPath + "/cblas_ddot_double.bc", Err, M.getContext(), true,
M.getDataLayout().getStringRepresentation());
#else
auto BC = llvm::parseIRFile(
BCPath + "/cblas_ddot_double.bc", Err, M.getContext(),
[&](StringRef) {
return Optional<std::string>(
M.getDataLayout().getStringRepresentation());
});
#endif
if (!BC)
Err.print("bcloader", llvm::errs());
assert(BC);
Linker L(M);
L.linkInModule(std::move(BC));
}
}
return true;
}
};
} // namespace

char BCLoader::ID = 0;

static RegisterPass<BCLoader> X("bcloader",
"Link bitcode files for known functions");

ModulePass *createBCLoaderPass() { return new BCLoader(); }
3 changes: 3 additions & 0 deletions enzyme/BCLoad/BCLoader.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#include "llvm/Pass.h"

llvm::ModulePass *createBCLoaderPass();
39 changes: 39 additions & 0 deletions enzyme/BCLoad/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

if (${LLVM_VERSION_MAJOR} LESS 8)
add_llvm_loadable_module( BCPass-${LLVM_VERSION_MAJOR}
BCLoader.cpp ClangBCLoader.cpp
DEPENDS
intrinsics_gen
PLUGIN_TOOL
opt
)
else()
# on windows `PLUGIN_TOOL` doesn't link against LLVM.dll
if ((WIN32 OR CYGWIN) AND LLVM_LINK_LLVM_DYLIB)
add_llvm_library( BCPass-${LLVM_VERSION_MAJOR}
BCLoader.cpp ClangBCLoader.cpp
MODULE
DEPENDS
intrinsics_gen
LINK_COMPONENTS
LLVM
)
else()
add_llvm_library( BCPass-${LLVM_VERSION_MAJOR}
BCLoader.cpp ClangBCLoader.cpp
MODULE
DEPENDS
intrinsics_gen
PLUGIN_TOOL
opt
)
endif()
endif()

if (APPLE)
# Darwin-specific linker flags for loadable modules.
set_target_properties(BCPass-${LLVM_VERSION_MAJOR} PROPERTIES
LINK_FLAGS "-Wl,-flat_namespace -Wl,-undefined -Wl,suppress")
endif()
20 changes: 20 additions & 0 deletions enzyme/BCLoad/ClangBCLoader.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"

#include "BCLoader.h"

#include "llvm/LinkAllPasses.h"

using namespace llvm;

// This function is of type PassManagerBuilder::ExtensionFn
static void loadPass(const PassManagerBuilder &Builder,
legacy::PassManagerBase &PM) {
PM.add(createBCLoaderPass());
}

// These constructors add our pass to a list of global extensions.
static RegisterStandardPasses
clangtoolLoader_Ox(PassManagerBuilder::EP_ModuleOptimizerEarly, loadPass);
static RegisterStandardPasses
clangtoolLoader_O0(PassManagerBuilder::EP_EnabledOnOptLevel0, loadPass);
1 change: 1 addition & 0 deletions enzyme/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/include/SCEV/ScalarEvolutionExpander.h"
include_directories("${CMAKE_CURRENT_BINARY_DIR}/include")

add_subdirectory(Enzyme)
add_subdirectory(BCLoad)
add_subdirectory(test)

# The benchmarks data are not in git-exported source archives to minimize size.
Expand Down
Binary file added enzyme/bclib/cblas_ddot_double.bc
Binary file not shown.
9 changes: 9 additions & 0 deletions enzyme/test/BCLoader/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Run regression and unit tests
add_lit_testsuite(check-bcpass "Running BCPass regression tests"
${CMAKE_CURRENT_BINARY_DIR}
DEPENDS ${ENZYME_TEST_DEPS}
ARGS -v
)

set_target_properties(check-bcpass PROPERTIES FOLDER "Tests")

150 changes: 150 additions & 0 deletions enzyme/test/BCLoader/bcloader-ddot.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
;RUN: if [ %llvmver -ge 10 ]; then %clang %s -Xclang -load -Xclang %loadBC -mllvm -bcpath=%BClibdir -S -emit-llvm -o - | %FileCheck %s; fi

;#include <cblas.h>
;#include <stdio.h>
;
;extern double __enzyme_autodiff(void *, double *, double *, double *,
; double *);
;
;double g(double *m, double *n) {
; double x = cblas_ddot(3, m, 1, n, 1);
; m[0] = 11.0;
; m[1] = 12.0;
; m[2] = 13.0;
; double y = x * x;
; return y;
;}
;
;int main() {
; double m[3] = {1, 2, 3};
; double m1[3] = {0, 0, 0};
; double n[3] = {4, 5, 6};
; double n1[3] = {0, 0, 0};
; double val = __enzyme_autodiff((void*)g, m, m1, n, n1);
; return 1;
;}

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@__const.main.m = private unnamed_addr constant [3 x double] [double 1.000000e+00, double 2.000000e+00, double 3.000000e+00], align 16
@__const.main.n = private unnamed_addr constant [3 x double] [double 4.000000e+00, double 5.000000e+00, double 6.000000e+00], align 16

; Function Attrs: noinline nounwind optnone uwtable
define dso_local double @g(double* %m, double* %n) {
entry:
%m.addr = alloca double*, align 8
%n.addr = alloca double*, align 8
%x = alloca double, align 8
%y = alloca double, align 8
store double* %m, double** %m.addr, align 8
store double* %n, double** %n.addr, align 8
%0 = load double*, double** %m.addr, align 8
%1 = load double*, double** %n.addr, align 8
%call = call double @cblas_ddot(i32 3, double* %0, i32 1, double* %1, i32 1)
store double %call, double* %x, align 8
%2 = load double*, double** %m.addr, align 8
%arrayidx = getelementptr inbounds double, double* %2, i64 0
store double 1.100000e+01, double* %arrayidx, align 8
%3 = load double*, double** %m.addr, align 8
%arrayidx1 = getelementptr inbounds double, double* %3, i64 1
store double 1.200000e+01, double* %arrayidx1, align 8
%4 = load double*, double** %m.addr, align 8
%arrayidx2 = getelementptr inbounds double, double* %4, i64 2
store double 1.300000e+01, double* %arrayidx2, align 8
%5 = load double, double* %x, align 8
%6 = load double, double* %x, align 8
%mul = fmul double %5, %6
store double %mul, double* %y, align 8
%7 = load double, double* %y, align 8
ret double %7
}

declare dso_local double @cblas_ddot(i32, double*, i32, double*, i32)

; Function Attrs: noinline nounwind optnone uwtable
define dso_local i32 @main() {
entry:
%retval = alloca i32, align 4
%m = alloca [3 x double], align 16
%m1 = alloca [3 x double], align 16
%n = alloca [3 x double], align 16
%n1 = alloca [3 x double], align 16
%val = alloca double, align 8
store i32 0, i32* %retval, align 4
%0 = bitcast [3 x double]* %m to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast ([3 x double]* @__const.main.m to i8*), i64 24, i1 false)
%1 = bitcast [3 x double]* %m1 to i8*
call void @llvm.memset.p0i8.i64(i8* align 16 %1, i8 0, i64 24, i1 false)
%2 = bitcast [3 x double]* %n to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %2, i8* align 16 bitcast ([3 x double]* @__const.main.n to i8*), i64 24, i1 false)
%3 = bitcast [3 x double]* %n1 to i8*
call void @llvm.memset.p0i8.i64(i8* align 16 %3, i8 0, i64 24, i1 false)
%arraydecay = getelementptr inbounds [3 x double], [3 x double]* %m, i32 0, i32 0
%arraydecay1 = getelementptr inbounds [3 x double], [3 x double]* %m1, i32 0, i32 0
%arraydecay2 = getelementptr inbounds [3 x double], [3 x double]* %n, i32 0, i32 0
%arraydecay3 = getelementptr inbounds [3 x double], [3 x double]* %n1, i32 0, i32 0
%call = call double @__enzyme_autodiff(i8* bitcast (double (double*, double*)* @g to i8*), double* %arraydecay, double* %arraydecay1, double* %arraydecay2, double* %arraydecay3)
store double %call, double* %val, align 8
ret i32 1
}

; Function Attrs: argmemonly nounwind
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)

; Function Attrs: argmemonly nounwind
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1)

declare dso_local double @__enzyme_autodiff(i8*, double*, double*, double*, double*)

;CHECK: define dso_local double @cblas_ddot(i32 %__N, double* %__X, i32 %__incX, double* %__Y, i32 %__incY)
;CHECK-NEXT: entry:
;CHECK-NEXT: %__N.addr = alloca i32, align 4
;CHECK-NEXT: %__X.addr = alloca double*, align 8
;CHECK-NEXT: %__incX.addr = alloca i32, align 4
;CHECK-NEXT: %__Y.addr = alloca double*, align 8
;CHECK-NEXT: %__incY.addr = alloca i32, align 4
;CHECK-NEXT: %sum = alloca double, align 8
;CHECK-NEXT: %i = alloca i32, align 4
;CHECK-NEXT: store i32 %__N, i32* %__N.addr, align 4
;CHECK-NEXT: store double* %__X, double** %__X.addr, align 8
;CHECK-NEXT: store i32 %__incX, i32* %__incX.addr, align 4
;CHECK-NEXT: store double* %__Y, double** %__Y.addr, align 8
;CHECK-NEXT: store i32 %__incY, i32* %__incY.addr, align 4
;CHECK-NEXT: store double 0.000000e+00, double* %sum, align 8
;CHECK-NEXT: store i32 0, i32* %i, align 4
;CHECK-NEXT: br label %for.cond

;CHECK: for.cond: ; preds = %for.inc, %entry
;CHECK-NEXT: %0 = load i32, i32* %i, align 4
;CHECK-NEXT: %1 = load i32, i32* %__N.addr, align 4
;CHECK-NEXT: %cmp = icmp slt i32 %0, %1
;CHECK-NEXT: br i1 %cmp, label %for.body, label %for.end

;CHECK: for.body: ; preds = %for.cond
;CHECK-NEXT: %2 = load double, double* %sum, align 8
;CHECK-NEXT: %3 = load double*, double** %__X.addr, align 8
;CHECK-NEXT: %4 = load i32, i32* %i, align 4
;CHECK-NEXT: %idxprom = sext i32 %4 to i64
;CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %3, i64 %idxprom
;CHECK-NEXT: %5 = load double, double* %arrayidx, align 8
;CHECK-NEXT: %6 = load double*, double** %__Y.addr, align 8
;CHECK-NEXT: %7 = load i32, i32* %i, align 4
;CHECK-NEXT: %idxprom1 = sext i32 %7 to i64
;CHECK-NEXT: %arrayidx2 = getelementptr inbounds double, double* %6, i64 %idxprom1
;CHECK-NEXT: %8 = load double, double* %arrayidx2, align 8
;CHECK-NEXT: %mul = fmul double %5, %8
;CHECK-NEXT: %add = fadd double %2, %mul
;CHECK-NEXT: store double %add, double* %sum, align 8
;CHECK-NEXT: br label %for.inc

;CHECK: for.inc: ; preds = %for.body
;CHECK-NEXT: %9 = load i32, i32* %i, align 4
;CHECK-NEXT: %inc = add nsw i32 %9, 1
;CHECK-NEXT: store i32 %inc, i32* %i, align 4
;CHECK-NEXT: br label %for.cond

;CHECK: for.end: ; preds = %for.cond
;CHECK-NEXT: %10 = load double, double* %sum, align 8
;CHECK-NEXT: ret double %10
;CHECK-NEXT: }
3 changes: 2 additions & 1 deletion enzyme/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@ configure_lit_site_cfg(
${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py
)

set(ENZYME_TEST_DEPS LLVMEnzyme-${LLVM_VERSION_MAJOR})
set(ENZYME_TEST_DEPS LLVMEnzyme-${LLVM_VERSION_MAJOR} BCPass-${LLVM_VERSION_MAJOR})

add_subdirectory(ActivityAnalysis)
add_subdirectory(TypeAnalysis)
add_subdirectory(Enzyme/ReverseMode)
add_subdirectory(Enzyme/ForwardMode)
add_subdirectory(Integration/ReverseMode)
add_subdirectory(Integration/ForwardMode)
add_subdirectory(BCLoader)

add_custom_target(check-enzyme DEPENDS check-enzyme-reverse check-enzyme-forward)
add_custom_target(check-enzyme-integration DEPENDS check-enzyme-integration-reverse check-enzyme-integration-forward)
4 changes: 4 additions & 0 deletions enzyme/test/lit.site.cfg.py.in
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ config.substitutions.append(('%clang', config.llvm_tools_dir + "/clang"))
config.substitutions.append(('%loadEnzyme', ''
+ ' -load=@ENZYME_BINARY_DIR@/Enzyme/LLVMEnzyme-' + config.llvm_ver + config.llvm_shlib_ext
))
config.substitutions.append(('%loadBC', ''
+ ' @ENZYME_BINARY_DIR@/BCLoad/BCPass-' + config.llvm_ver + config.llvm_shlib_ext
reikdas marked this conversation as resolved.
Show resolved Hide resolved
))
config.substitutions.append(('%BClibdir', '@ENZYME_SOURCE_DIR@/bclib/'))

# Let the main config do the real work.
lit_config.load_config(config, "@ENZYME_SOURCE_DIR@/test/lit.cfg.py")