Skip to content

Commit

Permalink
accept length of characters not tokens
Browse files Browse the repository at this point in the history
  • Loading branch information
rocktimsaikia committed Apr 12, 2023
1 parent c166d02 commit 2ab697c
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 32 deletions.
8 changes: 3 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -195,14 +195,12 @@ aicommits config set timeout=20000 # 20s
```

#### length
The maximum number of tokens to be generated for the commit message.
The maximum character length to be generated for the commit message.

Default: `20` (80 characters)

> Important: One token in OpenAI generally corresponds to `~4` characters of text for common English text. So `20 tokens ~= 80 characters`. Learn more about it [here](https://platform.openai.com/tokenizer)
Default: `50`

```sh
aicommits config set length=30 # 120 characters
aicommits config set length=100
```

## How it works
Expand Down
18 changes: 4 additions & 14 deletions src/utils/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,7 @@ import { KnownError } from './error.js';
const { hasOwnProperty } = Object.prototype;
export const hasOwn = (object: unknown, key: PropertyKey) => hasOwnProperty.call(object, key);

const parseAssert = (
name: string,
condition: any,
message: string,
) => {
const parseAssert = (name: string, condition: any, message: string) => {
if (!condition) {
throw new KnownError(`Invalid config property ${name}: ${message}`);
}
Expand All @@ -36,7 +32,6 @@ const configParsers = {

parseAssert('locale', locale, 'Cannot be empty');
parseAssert('locale', /^[a-z-]+$/i.test(locale), 'Must be a valid locale (letters and dashes/underscores). You can consult the list of codes in: https://wikipedia.org/wiki/List_of_ISO_639-1_codes');

return locale;
},
generate(count?: string) {
Expand Down Expand Up @@ -82,19 +77,16 @@ const configParsers = {
},
length(length?: string) {
if (!length) {
// One token in OpenAI generally corresponds to `~4` characters of common English text.
// So `20 tokens ~= 80 characters`. Learn more about it https://platform.openai.com/tokenizer
return 20;
return 50;
}

parseAssert('length', /^\d+$/.test(length), 'Must be an integer');

const parsed = Number(length);
parseAssert('length', parsed >= 5, 'Must be greater than 5 tokens(20 characters))');
parseAssert('length', parsed >= 20, 'Must be greater than 20 characters');

return parsed;
},

} as const;

type ConfigKeys = keyof typeof configParsers;
Expand Down Expand Up @@ -132,9 +124,7 @@ export const getConfig = async (cliConfig?: RawConfig): Promise<ValidConfig> =>
return parsedConfig as ValidConfig;
};

export const setConfigs = async (
keyValues: [key: string, value: string][],
) => {
export const setConfigs = async (keyValues: [key: string, value: string][]) => {
const config = await readConfigFile();

for (const [key, value] of keyValues) {
Expand Down
28 changes: 20 additions & 8 deletions src/utils/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,7 @@ const httpsPost = async (
'Content-Length': Buffer.byteLength(postContent),
},
timeout,
agent: (
proxy
? createHttpsProxyAgent(proxy)
: undefined
),
agent: proxy ? createHttpsProxyAgent(proxy) : undefined,
},
(response) => {
const body: Buffer[] = [];
Expand Down Expand Up @@ -101,7 +97,17 @@ const sanitizeMessage = (message: string) => message.trim().replace(/[\n\r]/g, '

const deduplicateMessages = (array: string[]) => Array.from(new Set(array));

const getPrompt = (locale: string, diff: string) => `Write a git commit message in present tense for the following diff without prefacing it with anything. Do not be needlessly verbose and make sure the answer is concise and to the point. The response must be in the language ${locale}:\n${diff}`;
const getPrompt = (locale: string, diff: string, length: number) => `Write a git commit message in present tense for the following diff without prefacing it with anything. Do not be needlessly verbose and make sure the answer is concise and to the point. The response must be no longer than ${length} characters. The response must be in the language ${locale}:\n${diff}`;

const generateStringFromLength = (length: number) => {
let result = '';
const characters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz';
for (let i = 0; i < length; i += 1) {
const randomIndex = Math.floor(Math.random() * characters.length);
result += characters.charAt(randomIndex);
}
return result;
};

const getTokens = (prompt: string, model: TiktokenModel) => {
const encoder = encoding_for_model(model);
Expand All @@ -121,10 +127,16 @@ export const generateCommitMessage = async (
timeout: number,
proxy?: string,
) => {
const prompt = getPrompt(locale, diff);
const prompt = getPrompt(locale, diff, length);

// Padded by 5 for more room for the completion.
length += 5;
const stringFromLength = generateStringFromLength(length);
const tokenFromLength = getTokens(stringFromLength, model);
const tokenFromPrompt = getTokens(prompt, model);

// The token limit is shared between the prompt and the completion.
const maxTokens = length + getTokens(prompt, model);
const maxTokens = tokenFromLength + tokenFromPrompt;

try {
const completion = await createChatCompletion(
Expand Down
2 changes: 2 additions & 0 deletions tests/specs/cli/commits.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ export default testSuite(({ describe }) => {
expect(statusAfter.stdout).toBe('');

const { stdout: commitMessage } = await git('log', ['--oneline']);
expect(commitMessage.length <= 50 + 5).toBe(true);

console.log('Committed with:', commitMessage);

await fixture.rm();
Expand Down
10 changes: 5 additions & 5 deletions tests/specs/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,16 +77,16 @@ export default testSuite(({ describe }) => {
expect(stderr).toMatch('Must be an integer');
});

test('setting length config less than 5 tokens(20 characters)', async () => {
const { stderr } = await aicommits(['config', 'set', 'length=4'], {
test('setting length config less than 10 characters)', async () => {
const { stderr } = await aicommits(['config', 'set', 'length=10'], {
reject: false,
});

expect(stderr).toMatch('Must be greater than 5 tokens(20 characters))');
expect(stderr).toMatch(/must be greater than 20 characters/i);
});

test('setting valid length config', async () => {
const length = 'length=100';
test('setting length config less than 60 characters', async () => {
const length = 'length=60';
await aicommits(['config', 'set', length]);

const configFile = await fs.readFile(configPath, 'utf8');
Expand Down

0 comments on commit 2ab697c

Please sign in to comment.