Skip to content

Commit

Permalink
Remap <|eot_id|> → <|end_of_text|>
Browse files Browse the repository at this point in the history
This fixes meta llama3 70b instruct, however it doesn't seem to help
with llama3 8b instruct.
  • Loading branch information
jart committed Apr 19, 2024
1 parent ff9decc commit da4d780
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 1 deletion.
9 changes: 9 additions & 0 deletions llama.cpp/llama.cpp
Expand Up @@ -14593,3 +14593,12 @@ static void llama_log_callback_default(ggml_log_level level, const char * text,
fputs(text, stderr);
fflush(stderr);
}

llama_token llama_string_to_token(const struct llama_model *model, const char *str) {
auto i = model->vocab.token_to_id.find(str);
if (i != model->vocab.token_to_id.end()) {
return i->second;
} else {
return -1;
}
}
2 changes: 2 additions & 0 deletions llama.cpp/llama.h
Expand Up @@ -979,6 +979,8 @@ extern "C" {

LLAMA_API void llama_dump_timing_info_yaml(FILE * stream, const struct llama_context * ctx);

LLAMA_API llama_token llama_string_to_token(const struct llama_model *, const char *);

#ifdef __cplusplus
}
#endif
Expand Down
10 changes: 9 additions & 1 deletion llama.cpp/sampling.cpp
Expand Up @@ -382,7 +382,15 @@ llama_token llama_sampling_sample(
struct llama_context * ctx_cfg,
const int idx) {
// Call the implementation function with is_resampling set to false by default
return llama_sampling_sample_impl(ctx_sampling, ctx_main, ctx_cfg, idx, false);
llama_token tok = llama_sampling_sample_impl(ctx_sampling, ctx_main, ctx_cfg, idx, false);

// [jart] llama3 <|eot_id|> → <|end_of_text|>
// https://github.com/ggerganov/llama.cpp/pull/6745#issuecomment-2064950897
if (tok == llama_string_to_token(llama_get_model(ctx_main), "<|eot_id|>")) {
tok = llama_token_eos(llama_get_model(ctx_main));
}

return tok;
}

llama_token_data_array llama_sampling_probability_distribution(
Expand Down

0 comments on commit da4d780

Please sign in to comment.