-
-
Notifications
You must be signed in to change notification settings - Fork 120
/
Copy pathTokenPredictor.ts
65 lines (57 loc) · 2.4 KB
/
TokenPredictor.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import {Token} from "../../types.js";
import {SequenceEvaluateOptions} from "./types.js";
import {LlamaContextSequence} from "./LlamaContext.js";
/**
* @see [Using Token Predictors](https://node-llama-cpp.withcat.ai/guide/token-prediction#custom)
*/
export abstract class TokenPredictor {
/**
* Resets the state of the predictor.
*
* Called before the generation starts.
*/
public abstract reset(params: {
/** The target sequence that this token predictor is generating tokens for */
targetSequence: LlamaContextSequence,
/**
* The tokens that are or will be loaded into the state.
*
* The initial predictions should be based on these tokens.
*
* When additional tokens are pushed into the state, the `pushTokens` method will be called with those tokens.
*/
stateTokens: Token[],
/**
* Options used for the evaluation on the target sequence.
*
* The `grammarEvaluationState` is cloned before being passed to the token predictor,
* so it can be modified without affecting the original state.
*/
evaluateOptions: Readonly<SequenceEvaluateOptions>
}): Promise<void> | void;
public abstract pushTokens(tokens: Token[]): void;
/**
* Predicts the next tokens based on the current state.
*
* If the generation should wait until the minimum predications are ready,
* this method should return a promise that resolves when the minimum predictions are ready.
*
* A background prediction process can be started when this function is called,
* so that the next predictions will be ready when this function is called again.
*/
public abstract predictTokens(): Promise<Token[]> | Token[];
/**
* Stops the prediction process when it runs in the background.
* @param untilPredictionsExhausted - If true, the prediction process should not resume until the current predictions are exhausted.
*/
public stop(untilPredictionsExhausted?: boolean): Promise<void> | void {}
/**
* Called with the input tokens before the generation starts when using `LlamaChatSession`, `LlamaChat`, and `LlamaCompletion`.
*/
public updateInputTokens(tokens: Token[]): void {}
public dispose(): Promise<void> | void {}
/** @hidden */
public [Symbol.dispose]() {
return this.dispose();
}
}