Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
GordonSmith committed Nov 23, 2023
1 parent 8d842b3 commit c27ccfc
Show file tree
Hide file tree
Showing 12 changed files with 386 additions and 4 deletions.
5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@
"webassembly",
"wasm",
"dot",
"llama",
"zstd",
"graphviz-dot",
"zstandard",
"expat",
"base91",
"expat-xml-parser",
"webassemby"
"webassembly"
],
"type": "module",
"exports": {
Expand Down Expand Up @@ -167,4 +168,4 @@
"url": "https://github.com/hpcc-systems/hpcc-js-wasm/issues"
},
"homepage": "https://hpcc-systems.github.io/hpcc-js-wasm/"
}
}
3 changes: 2 additions & 1 deletion rollup.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,9 @@ export default args => {
browserTplIndex("lib-esm/index", "dist/index.umd", "dist/index"),

browserTpl("lib-esm/base91", "dist/base91.umd", "dist/base91"),
browserTpl("lib-esm/graphviz", "dist/graphviz.umd", "dist/graphviz"),
browserTpl("lib-esm/expat", "dist/expat.umd", "dist/expat"),
browserTpl("lib-esm/graphviz", "dist/graphviz.umd", "dist/graphviz"),
browserTpl("lib-esm/llama", "dist/llama.umd", "dist/llama"),
browserTpl("lib-esm/zstd", "dist/zstd.umd", "dist/zstd"),

browserTpl("lib-esm/__tests__/index-browser", "dist-test/index.umd", "dist-test/index"),
Expand Down
1 change: 1 addition & 0 deletions src-cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
add_subdirectory(base91)
add_subdirectory(expat)
add_subdirectory(graphviz)
add_subdirectory(llama)
add_subdirectory(zstd)
44 changes: 44 additions & 0 deletions src-cpp/llama/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
project(llamalib)

find_package(Llama CONFIG REQUIRED)

# See: https://github.com/emscripten-core/emscripten/blob/main/src/settings.js
string(REPLACE ";" " " CPP_FLAGS "${EM_CPP_FLAGS}")

set(EM_LINK_FLAGS
${EM_LINK_FLAGS}
"-sEXPORT_NAME='${CMAKE_PROJECT_NAME}'"
"-sEXPORTED_FUNCTIONS=\"[]\""
"-sEXPORTED_RUNTIME_METHODS=\"[UTF8ToString]\""
"--post-js ${CMAKE_CURRENT_BINARY_DIR}/main_glue.js"
)
string(REPLACE ";" " " LINK_FLAGS "${EM_LINK_FLAGS}")

# Generate Glue from IDL file ---
add_custom_command(
MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/main.idl
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/main_glue.js ${CMAKE_CURRENT_BINARY_DIR}/main_glue.cpp
COMMAND python3 ${CMAKE_BINARY_DIR}/../emsdk/upstream/emscripten/tools/webidl_binder.py ${CMAKE_CURRENT_SOURCE_DIR}/main.idl ${CMAKE_CURRENT_BINARY_DIR}/main_glue
)
set_property(SOURCE main.cpp APPEND PROPERTY OBJECT_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/main_glue.cpp)
# --- --- ---

include_directories(
${VCPKG_INCLUDE_DIR}
${CMAKE_CURRENT_BINARY_DIR}
${Llama_DIR}/common
)

add_executable(llamalib
main.cpp
${Llama_DIR}/common/common.cpp
)

set_target_properties(llamalib PROPERTIES COMPILE_FLAGS "${CPP_FLAGS}")
set_target_properties(llamalib PROPERTIES LINK_FLAGS "${LINK_FLAGS}")

target_link_libraries(llamalib
PRIVATE llama
)

packWasm(llamalib)
221 changes: 221 additions & 0 deletions src-cpp/llama/main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
#include <stdlib.h>

#include "common.h"
#include "llama.h"

#include <cmath>
#include <cstdio>
#include <string>
#include <vector>

int main(int argc, char **argv)
{
gpt_params params;

if (argc == 1 || argv[1][0] == '-')
{
printf("usage: %s MODEL_PATH [PROMPT]\n", argv[0]);
return 1;
}

if (argc >= 2)
{
params.model = argv[1];
}

if (argc >= 3)
{
params.prompt = argv[2];
}

if (params.prompt.empty())
{
params.prompt = "Hello my name is";
}

// total length of the sequence including the prompt
const int n_len = 32;

// init LLM

llama_backend_init(params.numa);

// initialize the model

llama_model_params model_params = llama_model_default_params();

// model_params.n_gpu_layers = 99; // offload all layers to the GPU

llama_model *model = llama_load_model_from_file(params.model.c_str(), model_params);

if (model == NULL)
{
fprintf(stderr, "%s: error: unable to load model\n", __func__);
return 1;
}

// initialize the context

llama_context_params ctx_params = llama_context_default_params();

ctx_params.seed = 1234;
ctx_params.n_ctx = 2048;
ctx_params.n_threads = params.n_threads;
ctx_params.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch;

llama_context *ctx = llama_new_context_with_model(model, ctx_params);

if (ctx == NULL)
{
fprintf(stderr, "%s: error: failed to create the llama_context\n", __func__);
return 1;
}

// tokenize the prompt

std::vector<llama_token> tokens_list;
tokens_list = ::llama_tokenize(ctx, params.prompt, true);

const int n_ctx = llama_n_ctx(ctx);
const int n_kv_req = tokens_list.size() + (n_len - tokens_list.size());

LOG_TEE("\n%s: n_len = %d, n_ctx = %d, n_kv_req = %d\n", __func__, n_len, n_ctx, n_kv_req);

// make sure the KV cache is big enough to hold all the prompt and generated tokens
if (n_kv_req > n_ctx)
{
LOG_TEE("%s: error: n_kv_req > n_ctx, the required KV cache size is not big enough\n", __func__);
LOG_TEE("%s: either reduce n_parallel or increase n_ctx\n", __func__);
return 1;
}

// print the prompt token-by-token

fprintf(stderr, "\n");

for (auto id : tokens_list)
{
fprintf(stderr, "%s", llama_token_to_piece(ctx, id).c_str());
}

fflush(stderr);

// create a llama_batch with size 512
// we use this object to submit token data for decoding

llama_batch batch = llama_batch_init(512, 0, 1);

// evaluate the initial prompt
for (size_t i = 0; i < tokens_list.size(); i++)
{
llama_batch_add(batch, tokens_list[i], i, {0}, false);
}

// llama_decode will output logits only for the last token of the prompt
batch.logits[batch.n_tokens - 1] = true;

if (llama_decode(ctx, batch) != 0)
{
LOG_TEE("%s: llama_decode() failed\n", __func__);
return 1;
}

// main loop

int n_cur = batch.n_tokens;
int n_decode = 0;

const auto t_main_start = ggml_time_us();

while (n_cur <= n_len)
{
// sample the next token
{
auto n_vocab = llama_n_vocab(model);
auto *logits = llama_get_logits_ith(ctx, batch.n_tokens - 1);

std::vector<llama_token_data> candidates;
candidates.reserve(n_vocab);

for (llama_token token_id = 0; token_id < n_vocab; token_id++)
{
candidates.emplace_back(llama_token_data{token_id, logits[token_id], 0.0f});
}

llama_token_data_array candidates_p = {candidates.data(), candidates.size(), false};

// sample the most likely token
const llama_token new_token_id = llama_sample_token_greedy(ctx, &candidates_p);

// is it an end of stream?
if (new_token_id == llama_token_eos(model) || n_cur == n_len)
{
LOG_TEE("\n");

break;
}

LOG_TEE("%s", llama_token_to_piece(ctx, new_token_id).c_str());
fflush(stdout);

// prepare the next batch
llama_batch_clear(batch);

// push this new token for next evaluation
llama_batch_add(batch, new_token_id, n_cur, {0}, true);

n_decode += 1;
}

n_cur += 1;

// evaluate the current batch with the transformer model
if (llama_decode(ctx, batch))
{
fprintf(stderr, "%s : failed to eval, return code %d\n", __func__, 1);
return 1;
}
}

LOG_TEE("\n");

const auto t_main_end = ggml_time_us();

LOG_TEE("%s: decoded %d tokens in %.2f s, speed: %.2f t/s\n",
__func__, n_decode, (t_main_end - t_main_start) / 1000000.0f, n_decode / ((t_main_end - t_main_start) / 1000000.0f));

llama_print_timings(ctx);

fprintf(stderr, "\n");

llama_batch_free(batch);

llama_free(ctx);
llama_free_model(model);

llama_backend_free();

return 0;
}

struct llama
{
public:
static const char *version(void)
{
return "0.0.1";
}

static void *malloc(size_t __size)
{
return ::malloc(__size);
}

static void free(void *__ptr)
{
::free(__ptr);
}
};

// Include JS Glue ---
#include "main_glue.cpp"
7 changes: 7 additions & 0 deletions src-cpp/llama/main.idl
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
interface llama
{
[Const] static DOMString version();

static any malloc(unsigned long size);
static void free(any ptr);
};
2 changes: 1 addition & 1 deletion src-cpp/zstd/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ add_executable(zstdlib
main.cpp
)

set_target_properties(graphvizlib PROPERTIES COMPILE_FLAGS "${CPP_FLAGS}")
set_target_properties(zstdlib PROPERTIES COMPILE_FLAGS "${CPP_FLAGS}")
set_target_properties(zstdlib PROPERTIES LINK_FLAGS "${LINK_FLAGS}")

target_link_libraries(zstdlib
Expand Down
5 changes: 5 additions & 0 deletions src-ts/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ export namespace Graphviz {
return import("./graphviz.js").then(mod => mod.Graphviz.load());
}
}
export namespace Llama {
export function load() {
return import("./llama.js").then(mod => mod.Llama.load());
}
}
export namespace Zstd {
export function load() {
return import("./zstd.js").then(mod => mod.Zstd.load());
Expand Down

0 comments on commit c27ccfc

Please sign in to comment.