Skip to content

Commit

Permalink
Release llamafile v0.8.1
Browse files Browse the repository at this point in the history
  • Loading branch information
jart committed Apr 26, 2024
1 parent 7e6a488 commit 2095d50
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 25 deletions.
1 change: 1 addition & 0 deletions llamafile/copy.sh
Expand Up @@ -15,6 +15,7 @@ scp llama.cpp/ggml-cuda.cu \
llamafile/tinyblas.cu \
llamafile/llamafile.h \
llamafile/rocm.bat \
llamafile/rocm.sh \
llamafile/cuda.bat \
llamafile/cuda.sh \
$HOST:lfbuild/
9 changes: 7 additions & 2 deletions llamafile/cuda.bat
Expand Up @@ -4,8 +4,13 @@
:: NVCUDA DLLs are provided by the installation of the windows GPU
:: driver on a Windows system that has a CUDA-capable GPU installed.

nvcc -arch=all ^
--shared ^
nvcc --shared ^
-gencode=arch=compute_50,code=sm_50 ^
-gencode=arch=compute_60,code=sm_60 ^
-gencode=arch=compute_70,code=sm_70 ^
-gencode=arch=compute_75,code=sm_75 ^
-gencode=arch=compute_80,code=sm_80 ^
-gencode=arch=compute_90,code=sm_90 ^
--forward-unknown-to-host-compiler ^
-Xcompiler="/nologo /EHsc /O2 /GR /MT" ^
-DNDEBUG ^
Expand Down
59 changes: 43 additions & 16 deletions llamafile/cuda.sh
@@ -1,19 +1,46 @@
#!/bin/sh

TMP=$(mktemp -d) || exit

cp llama.cpp/ggml-cuda.cu \
llama.cpp/ggml-cuda.h \
llama.cpp/ggml-impl.h \
llama.cpp/ggml-alloc.h \
llama.cpp/ggml-common.h \
llama.cpp/ggml-backend.h \
llama.cpp/ggml-backend-impl.h \
llama.cpp/ggml.h \
llamafile/tinyblas.h \
llamafile/tinyblas.cu \
llamafile/llamafile.h \
llamafile/rocm.bat \
llamafile/rocm.sh \
llamafile/cuda.bat \
llamafile/cuda.sh \
"$TMP" || exit

cd "$TMP"

/usr/local/cuda/bin/nvcc \
-arch=all \
--shared \
--forward-unknown-to-host-compiler \
-use_fast_math \
--compiler-options "-fPIC -O3 -march=native -mtune=native" \
-DNDEBUG \
-DGGML_BUILD=1 \
-DGGML_SHARED=1 \
-DGGML_CUDA_MMV_Y=1 \
-DGGML_MULTIPLATFORM \
-DGGML_CUDA_DMMV_X=32 \
-DK_QUANTS_PER_ITERATION=2 \
-DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 \
-DGGML_USE_TINYBLAS \
-o ggml-cuda.so \
ggml-cuda.cu
--shared \
-gencode=arch=compute_50,code=sm_50 \
-gencode=arch=compute_60,code=sm_60 \
-gencode=arch=compute_70,code=sm_70 \
-gencode=arch=compute_75,code=sm_75 \
-gencode=arch=compute_80,code=sm_80 \
-gencode=arch=compute_90,code=sm_90 \
--forward-unknown-to-host-compiler \
--compiler-options \
"-fPIC -O3 -march=native -mtune=native" \
-DNDEBUG \
-DGGML_BUILD=1 \
-DGGML_SHARED=1 \
-DGGML_CUDA_MMV_Y=1 \
-DGGML_MULTIPLATFORM \
-DGGML_CUDA_DMMV_X=32 \
-DK_QUANTS_PER_ITERATION=2 \
-DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 \
-DGGML_USE_TINYBLAS \
-o ~/ggml-cuda.so \
ggml-cuda.cu \
-lcuda
9 changes: 3 additions & 6 deletions llamafile/rocm.sh
Expand Up @@ -10,20 +10,17 @@ hipcc \
-fPIC \
-shared \
-DNDEBUG \
-march=native \
-mtune=native \
-use_fast_math \
-DGGML_BUILD=1 \
-DGGML_SHARED=1 \
-Wno-return-type \
-Wno-unused-result \
-DGGML_USE_HIPBLAS \
-DGGML_USE_TINYBLAS \
-DGGML_CUDA_MMV_Y=1 \
-DGGML_MULTIPLATFORM \
-DGGML_CUDA_DMMV_X=32 \
-DK_QUANTS_PER_ITERATION=2 \
-DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 \
--amdgpu-target=gfx1100,gfx1031,gfx1030,gfx1032,gfx906,gfx1101,gfx1102,gfx1103 \
-o ggml-rocm.so \
ggml-cuda.cu \
-lhipblas \
-lrocblas
ggml-cuda.cu
2 changes: 1 addition & 1 deletion llamafile/version.h
Expand Up @@ -2,7 +2,7 @@

#define LLAMAFILE_MAJOR 0
#define LLAMAFILE_MINOR 8
#define LLAMAFILE_PATCH 0
#define LLAMAFILE_PATCH 1
#define LLAMAFILE_VERSION \
(100000000 * LLAMAFILE_MAJOR + 1000000 * LLAMAFILE_MINOR + LLAMAFILE_PATCH)

Expand Down

0 comments on commit 2095d50

Please sign in to comment.