Skip to content

Commit

Permalink
Merge branch 'master' into concedo_experimental
Browse files Browse the repository at this point in the history
# Conflicts:
#	.github/workflows/build.yml
#	CMakeLists.txt
#	Makefile
#	README-sycl.md
#	README.md
#	flake.lock
#	scripts/sync-ggml-am.sh
#	scripts/sync-ggml.last
#	scripts/sync-ggml.sh
#	tests/.gitignore
#	tests/test-backend-ops.cpp
  • Loading branch information
LostRuins committed Mar 11, 2024
2 parents 9229ea6 + 828defe commit 6a32c14
Show file tree
Hide file tree
Showing 41 changed files with 4,250 additions and 4,280 deletions.
46 changes: 45 additions & 1 deletion .github/workflows/server.yml
Expand Up @@ -47,6 +47,8 @@ jobs:
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
fetch-depth: 0

- name: Dependencies
id: depends
Expand All @@ -58,7 +60,6 @@ jobs:
cmake \
python3-pip \
wget \
psmisc \
language-pack-en
- name: Build
Expand Down Expand Up @@ -90,3 +91,46 @@ jobs:
run: |
cd examples/server/tests
PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow
server-windows:
runs-on: windows-latest

steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
fetch-depth: 0

- name: Build
id: cmake_build
run: |
mkdir build
cd build
cmake .. -DLLAMA_BUILD_SERVER=ON -DCMAKE_BUILD_TYPE=Release ;
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server
- name: Python setup
id: setup_python
uses: actions/setup-python@v5
with:
python-version: '3.11'

- name: Tests dependencies
id: test_dependencies
run: |
pip install -r examples/server/tests/requirements.txt
- name: Tests
id: server_integration_tests
run: |
cd examples/server/tests
behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp
- name: Slow tests
id: server_integration_tests_slow
if: ${{ github.event.schedule != '' || github.event.inputs.slow_tests == 'true' }}
run: |
cd examples/server/tests
behave.exe --stop --no-skipped --no-capture --tags slow
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -32,6 +32,7 @@ models-mnt
/embedding
/gguf
/gguf-llama-simple
/gritlm
/imatrix
/infill
/libllama.so
Expand Down
15 changes: 15 additions & 0 deletions common/common.cpp
Expand Up @@ -1853,3 +1853,18 @@ void dump_kv_cache_view_seqs(const llama_kv_cache_view & view, int row_size) {

printf("\n=== Done dumping\n");
}

void llama_embd_normalize(const float * inp, float * out, int n) {
double sum = 0.0;
for (int i = 0; i < n; i++) {
sum += inp[i] * inp[i];
}
sum = sqrt(sum);

const float norm = sum > 0.0 ? 1.0f / sum : 0.0f;

for (int i = 0; i < n; i++) {
out[i] = inp[i] * norm;
}
}

7 changes: 7 additions & 0 deletions common/common.h
Expand Up @@ -274,3 +274,10 @@ void dump_kv_cache_view(const llama_kv_cache_view & view, int row_size = 80);

// Dump the KV cache view showing individual sequences in each cell (long output).
void dump_kv_cache_view_seqs(const llama_kv_cache_view & view, int row_size = 40);

//
// Embedding utils
//

void llama_embd_normalize(const float * inp, float * out, int n);

16 changes: 16 additions & 0 deletions common/grammar-parser.cpp
Expand Up @@ -278,6 +278,22 @@ namespace grammar_parser {
while (*pos) {
pos = parse_rule(state, pos);
}
// Validate the state to ensure that all rules are defined
for (const auto & rule : state.rules) {
for (const auto & elem : rule) {
if (elem.type == LLAMA_GRETYPE_RULE_REF) {
// Ensure that the rule at that location exists
if (elem.value >= state.rules.size() || state.rules[elem.value].empty()) {
// Get the name of the rule that is missing
for (const auto & kv : state.symbol_ids) {
if (kv.second == elem.value) {
throw std::runtime_error("Undefined rule identifier '" + kv.first + "'");
}
}
}
}
}
}
return state;
} catch (const std::exception & err) {
fprintf(stderr, "%s: error parsing grammar: %s\n", __func__, err.what());
Expand Down
1 change: 1 addition & 0 deletions examples/CMakeLists.txt
Expand Up @@ -20,6 +20,7 @@ else()
add_subdirectory(convert-llama2c-to-ggml)
add_subdirectory(embedding)
add_subdirectory(finetune)
add_subdirectory(gritlm)
add_subdirectory(infill)
add_subdirectory(llama-bench)
add_subdirectory(llava)
Expand Down
6 changes: 2 additions & 4 deletions examples/benchmark/benchmark-matmult.cpp
Expand Up @@ -190,12 +190,10 @@ int main(int argc, char ** argv) {

int32_t nelements = sizex*sizey;

std::vector<int64_t> hist_cur(1 << 4, 0);

// Set up a the benchmark matrices
// printf("Creating new tensor q11 & Running quantize\n");
struct ggml_tensor * q11 = ggml_new_tensor_2d(ctx, qtype, sizex, sizey);
ggml_quantize_chunk(qtype, (const float *) m11->data, q11->data, 0, nelements/m11->ne[0], m11->ne[0], hist_cur.data(), nullptr);
ggml_quantize_chunk(qtype, (const float *) m11->data, q11->data, 0, nelements/m11->ne[0], m11->ne[0], nullptr);

// Set up a the compute graph
// printf("Creating new tensor q31\n");
Expand All @@ -208,7 +206,7 @@ int main(int argc, char ** argv) {
// Set up a second graph computation to make sure we override the CPU cache lines
// printf("Creating new tensor q12 & Running quantize\n");
struct ggml_tensor * q12 = ggml_new_tensor_2d(ctx, qtype, sizex, sizey);
ggml_quantize_chunk(qtype, (const float *) m12->data, q12->data, 0, nelements/m12->ne[0], m12->ne[0], hist_cur.data(), nullptr);
ggml_quantize_chunk(qtype, (const float *) m12->data, q12->data, 0, nelements/m12->ne[0], m12->ne[0], nullptr);

// printf("Creating new tensor q32\n");
struct ggml_tensor * q32 = ggml_mul_mat(ctx, q12, m2);
Expand Down
14 changes: 1 addition & 13 deletions examples/embedding/embedding.cpp
Expand Up @@ -24,17 +24,6 @@ static void batch_add_seq(llama_batch & batch, const std::vector<int32_t> & toke
}
}

static void normalize(const float * vec, float * out, int n) {
float norm = 0;
for (int i = 0; i < n; i++) {
norm += vec[i] * vec[i];
}
norm = sqrt(norm);
for (int i = 0; i < n; i++) {
out[i] = vec[i] / norm;
}
}

static void batch_decode(llama_context * ctx, llama_batch & batch, float * output, int n_seq, int n_embd) {
// clear previous kv_cache values (irrelevant for embeddings)
llama_kv_cache_clear(ctx);
Expand All @@ -45,7 +34,6 @@ static void batch_decode(llama_context * ctx, llama_batch & batch, float * outpu
fprintf(stderr, "%s : failed to decode\n", __func__);
}

// normalize on copy
for (int i = 0; i < batch.n_tokens; i++) {
if (!batch.logits[i]) {
continue;
Expand All @@ -62,7 +50,7 @@ static void batch_decode(llama_context * ctx, llama_batch & batch, float * outpu
}

float * out = output + batch.seq_id[i][0] * n_embd;
normalize(embd, out, n_embd);
llama_embd_normalize(embd, out, n_embd);
}
}

Expand Down
5 changes: 5 additions & 0 deletions examples/gritlm/CMakeLists.txt
@@ -0,0 +1,5 @@
set(TARGET gritlm)
add_executable(${TARGET} gritlm.cpp)
install(TARGETS ${TARGET} RUNTIME)
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
target_compile_features(${TARGET} PRIVATE cxx_std_11)

0 comments on commit 6a32c14

Please sign in to comment.