Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,21 +1,16 @@
From 3eaa2789f0099dbdd2f36efce4d1eeb6edfda033 Mon Sep 17 00:00:00 2001
From: Gian Marco Iodice <gianmarco.iodice@arm.com>
Date: Fri, 14 Jun 2024 14:55:47 +0100
Subject: [PATCH] Use KleidiAI Int4 Matmul micro-kernels in llama.cpp
From 453e52a763043e95b23c88176792e065377189ad Mon Sep 17 00:00:00 2001
From: Charles Xu <chaxu01@e125126.arm.com>
Date: Tue, 9 Jul 2024 08:49:27 +0200
Subject: [PATCH] Updated to be able to build on Linux

* Update CMake file to fetch the Int4 micro-kernels from the KleidiAI
repository
* Implement a KleidiAI backend for llama.cpp

Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
---
CMakeLists.txt | 48 ++++
ggml-alloc.c | 13 ++
ggml-kleidiai.cpp | 561 ++++++++++++++++++++++++++++++++++++++++++++++
ggml-kleidiai.cpp | 560 ++++++++++++++++++++++++++++++++++++++++++++++
ggml-kleidiai.h | 45 ++++
ggml.c | 27 +++
llama.cpp | 19 +-
6 files changed, 712 insertions(+), 1 deletion(-)
6 files changed, 711 insertions(+), 1 deletion(-)
create mode 100644 ggml-kleidiai.cpp
create mode 100644 ggml-kleidiai.h

Expand Down Expand Up @@ -118,10 +113,10 @@ index bd367c42..ed4ce0ae 100644
if (this_size > max_size) {
diff --git a/ggml-kleidiai.cpp b/ggml-kleidiai.cpp
new file mode 100644
index 00000000..9e343c86
index 00000000..aa53086d
--- /dev/null
+++ b/ggml-kleidiai.cpp
@@ -0,0 +1,561 @@
@@ -0,0 +1,560 @@
+/*
+ * Copyright (c) 2024 Arm Limited.
+ *
Expand All @@ -146,7 +141,7 @@ index 00000000..9e343c86
+ * SOFTWARE.
+ */
+
+#if defined(__aarch64__) && defined(__ANDROID__)
+#if defined(__aarch64__) && (defined(__ANDROID__) || defined(__linux__))
+#include "ggml-kleidiai.h"
+
+#include "ggml.h"
Expand Down Expand Up @@ -681,8 +676,7 @@ index 00000000..9e343c86
+ }
+ extra_mem_idx = 0;
+}
+
+#endif // defined(__aarch64__) && defined(__ANDROID__)
+#endif // defined(__aarch64__) && (defined(__ANDROID__) || defined(__linux__))
diff --git a/ggml-kleidiai.h b/ggml-kleidiai.h
new file mode 100644
index 00000000..a4cdf1fb
Expand Down Expand Up @@ -846,5 +840,5 @@ index 05591aa4..735dde04 100644
}

--
2.25.1
2.34.1

11 changes: 10 additions & 1 deletion kleidiai-examples/llama_cpp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,16 @@ mkdir build && cd build

export NDK_PATH="your-android-ndk-path"

cmake -DLLAMA_KLEIDIAI=ON -DLLAMA_OPENMP=OFF -DCMAKE_TOOLCHAIN_FILE=${NDK_PATH}/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=android-23 -DCMAKE_C_FLAGS=-march=armv8.2a+i8mm+dotprod -DCMAKE_CXX_FLAGS=-march=armv8.2a+i8mm+dotprod ..
cmake -DLLAMA_KLEIDIAI=ON -DCMAKE_TOOLCHAIN_FILE=${NDK_PATH}/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=android-23 -DCMAKE_C_FLAGS=-march=armv8.2a+i8mm+dotprod -DCMAKE_CXX_FLAGS=-march=armv8.2a+i8mm+dotprod ..

make -j4
```
Build the llama.cpp project for Linux:

```bash
mkdir build && cd build

cmake -DCMAKE_C_FLAGS=-march=armv8.2-a+dotprod+i8mm -DCMAKE_CXX_FLAGS=-march=armv8.2-a+dotprod+i8mm -DLLAMA_KLEIDIAI=ON

make -j4
```
Expand Down