-
Notifications
You must be signed in to change notification settings - Fork 62
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feature: optimiza llama.cpp loading, fix llama.cpp tokenizer, unify l…
…ogger (#75) * feat: add lora for llama.cpp * update: optimiza llama.cpp loading, fix llama.cpp tokenizer, unify logger
- Loading branch information
Showing
37 changed files
with
301 additions
and
224 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import { LLM } from "llama-node"; | ||
import { RwkvCpp } from "llama-node/dist/llm/rwkv-cpp.js"; | ||
import path from "path"; | ||
const modelPath = path.resolve(process.cwd(), "../ggml-rwkv-4_raven-7b-v9-Eng99%-20230412-ctx8192-Q4_1_0.bin"); | ||
const tokenizerPath = path.resolve(process.cwd(), "../20B_tokenizer.json"); | ||
const rwkv = new LLM(RwkvCpp); | ||
const config = { | ||
modelPath, | ||
tokenizerPath, | ||
nThreads: 4, | ||
enableLogging: true, | ||
}; | ||
const prompt = `The following is a coherent verbose detailed conversation between a girl named Alice and her friend Bob. Alice is very intelligent, creative and friendly. Alice is unlikely to disagree with Bob, and Alice doesn't like to ask Bob questions. Alice likes to tell Bob a lot about herself and her opinions. Alice usually gives Bob kind, helpful and informative advices.\n\nBob: Hello Alice, how are you doing?\n\nAlice: Hi! Thanks, I'm fine. What about you?\n\nBob: I am fine. It's nice to see you. Look, here is a store selling tea and juice.\n\nAlice: Sure. Let's go inside. I would like to have some Mocha latte, which is my favourite!\n\nBob: What is it?\n\nAlice: Mocha latte is usually made with espresso, milk, chocolate, and frothed milk. Its flavors are frequently sweet.\n\nBob: Sounds tasty. I'll try it next time. Would you like to chat with me for a while?\n\nAlice: Of course! I'm glad to answer your questions or give helpful advices. You know, I am confident with my expertise. So please go ahead!\n\n`; | ||
const run = async () => { | ||
await rwkv.load(config); | ||
// init session data | ||
const params = { | ||
maxPredictLength: 2048, | ||
topP: 0.1, | ||
temp: 0.1, | ||
prompt, | ||
sessionFilePath: path.resolve(process.cwd(), "../../session1.bin"), | ||
isSkipGeneration: true, | ||
isOverwriteSessionFile: true | ||
}; | ||
await rwkv.createCompletion(params, (response) => { | ||
process.stdout.write(response.token); | ||
}); | ||
// reuse session data, you don't need process all prompt once the session already initialized | ||
const params2 = { | ||
maxPredictLength: 2048, | ||
// For better Q&A accuracy and less diversity, reduce top_p (to 0.5, 0.2, 0.1 etc.) | ||
topP: 0.1, | ||
// Sampling temperature. It could be a good idea to increase temperature when top_p is low. | ||
temp: 0.1, | ||
prompt: 'Bob: Who are you?\\n\\nAlice: ', | ||
endString: '\n\n', | ||
sessionFilePath: path.resolve(process.cwd(), "../../session1.bin"), | ||
// set to false will keep the initial state of session | ||
isOverwriteSessionFile: true, | ||
// Penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. | ||
presencePenalty: 0.2, | ||
// Penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. | ||
frequencyPenalty: 0.2 | ||
}; | ||
await rwkv.createCompletion(params2, (response) => { | ||
process.stdout.write(response.token); | ||
}); | ||
}; | ||
run(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
[package] | ||
name = "common-rs" | ||
version = "0.1.0" | ||
edition = "2021" | ||
|
||
[dependencies] | ||
env_logger = "0.10.0" | ||
log = "0.4.17" | ||
once_cell = "1.17.1" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
pub mod logger; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
use log::{LevelFilter, Log, Metadata, Record}; | ||
use once_cell::sync::Lazy; | ||
|
||
pub struct LLamaLogger { | ||
enabled: bool, | ||
} | ||
|
||
static mut LLAMA_LOGGER_INNER: LLamaLogger = LLamaLogger { enabled: true }; | ||
pub static mut LLAMA_LOGGER: Lazy<&mut LLamaLogger> = Lazy::new(|| { | ||
log::set_max_level(LevelFilter::Info); | ||
log::set_logger(unsafe { &LLAMA_LOGGER_INNER }).unwrap(); | ||
unsafe { &mut LLAMA_LOGGER_INNER } | ||
}); | ||
|
||
impl LLamaLogger { | ||
pub fn set_enabled(&mut self, enabled: bool) { | ||
self.enabled = enabled; | ||
} | ||
|
||
pub fn get_singleton() -> &'static mut LLamaLogger { | ||
unsafe { &mut LLAMA_LOGGER } | ||
} | ||
} | ||
|
||
impl Log for LLamaLogger { | ||
fn enabled(&self, metadata: &Metadata) -> bool { | ||
metadata.level() <= log::Level::Info | ||
// true | ||
} | ||
|
||
fn log(&self, record: &Record) { | ||
// Check if the record is matched by the logger before logging | ||
if self.enabled(record.metadata()) && self.enabled { | ||
println!("{} - {}", record.level(), record.args()); | ||
} | ||
} | ||
|
||
fn flush(&self) {} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.