From 2ab697c5bc1e1f30290cb5dcffe9a3e8faeb6cee Mon Sep 17 00:00:00 2001 From: Rocktim Saikia Date: Wed, 12 Apr 2023 10:35:34 +0530 Subject: [PATCH] accept length of characters not tokens --- README.md | 8 +++----- src/utils/config.ts | 18 ++++-------------- src/utils/openai.ts | 28 ++++++++++++++++++++-------- tests/specs/cli/commits.ts | 2 ++ tests/specs/config.ts | 10 +++++----- 5 files changed, 34 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index c58fd257..e2762792 100644 --- a/README.md +++ b/README.md @@ -195,14 +195,12 @@ aicommits config set timeout=20000 # 20s ``` #### length -The maximum number of tokens to be generated for the commit message. +The maximum character length to be generated for the commit message. -Default: `20` (80 characters) - -> Important: One token in OpenAI generally corresponds to `~4` characters of text for common English text. So `20 tokens ~= 80 characters`. Learn more about it [here](https://platform.openai.com/tokenizer) +Default: `50` ```sh -aicommits config set length=30 # 120 characters +aicommits config set length=100 ``` ## How it works diff --git a/src/utils/config.ts b/src/utils/config.ts index fc61eaab..d861e756 100644 --- a/src/utils/config.ts +++ b/src/utils/config.ts @@ -9,11 +9,7 @@ import { KnownError } from './error.js'; const { hasOwnProperty } = Object.prototype; export const hasOwn = (object: unknown, key: PropertyKey) => hasOwnProperty.call(object, key); -const parseAssert = ( - name: string, - condition: any, - message: string, -) => { +const parseAssert = (name: string, condition: any, message: string) => { if (!condition) { throw new KnownError(`Invalid config property ${name}: ${message}`); } @@ -36,7 +32,6 @@ const configParsers = { parseAssert('locale', locale, 'Cannot be empty'); parseAssert('locale', /^[a-z-]+$/i.test(locale), 'Must be a valid locale (letters and dashes/underscores). You can consult the list of codes in: https://wikipedia.org/wiki/List_of_ISO_639-1_codes'); - return locale; }, generate(count?: string) { @@ -82,19 +77,16 @@ const configParsers = { }, length(length?: string) { if (!length) { - // One token in OpenAI generally corresponds to `~4` characters of common English text. - // So `20 tokens ~= 80 characters`. Learn more about it https://platform.openai.com/tokenizer - return 20; + return 50; } parseAssert('length', /^\d+$/.test(length), 'Must be an integer'); const parsed = Number(length); - parseAssert('length', parsed >= 5, 'Must be greater than 5 tokens(20 characters))'); + parseAssert('length', parsed >= 20, 'Must be greater than 20 characters'); return parsed; }, - } as const; type ConfigKeys = keyof typeof configParsers; @@ -132,9 +124,7 @@ export const getConfig = async (cliConfig?: RawConfig): Promise => return parsedConfig as ValidConfig; }; -export const setConfigs = async ( - keyValues: [key: string, value: string][], -) => { +export const setConfigs = async (keyValues: [key: string, value: string][]) => { const config = await readConfigFile(); for (const [key, value] of keyValues) { diff --git a/src/utils/openai.ts b/src/utils/openai.ts index 11a200ca..d6d834f4 100644 --- a/src/utils/openai.ts +++ b/src/utils/openai.ts @@ -31,11 +31,7 @@ const httpsPost = async ( 'Content-Length': Buffer.byteLength(postContent), }, timeout, - agent: ( - proxy - ? createHttpsProxyAgent(proxy) - : undefined - ), + agent: proxy ? createHttpsProxyAgent(proxy) : undefined, }, (response) => { const body: Buffer[] = []; @@ -101,7 +97,17 @@ const sanitizeMessage = (message: string) => message.trim().replace(/[\n\r]/g, ' const deduplicateMessages = (array: string[]) => Array.from(new Set(array)); -const getPrompt = (locale: string, diff: string) => `Write a git commit message in present tense for the following diff without prefacing it with anything. Do not be needlessly verbose and make sure the answer is concise and to the point. The response must be in the language ${locale}:\n${diff}`; +const getPrompt = (locale: string, diff: string, length: number) => `Write a git commit message in present tense for the following diff without prefacing it with anything. Do not be needlessly verbose and make sure the answer is concise and to the point. The response must be no longer than ${length} characters. The response must be in the language ${locale}:\n${diff}`; + +const generateStringFromLength = (length: number) => { + let result = ''; + const characters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'; + for (let i = 0; i < length; i += 1) { + const randomIndex = Math.floor(Math.random() * characters.length); + result += characters.charAt(randomIndex); + } + return result; +}; const getTokens = (prompt: string, model: TiktokenModel) => { const encoder = encoding_for_model(model); @@ -121,10 +127,16 @@ export const generateCommitMessage = async ( timeout: number, proxy?: string, ) => { - const prompt = getPrompt(locale, diff); + const prompt = getPrompt(locale, diff, length); + + // Padded by 5 for more room for the completion. + length += 5; + const stringFromLength = generateStringFromLength(length); + const tokenFromLength = getTokens(stringFromLength, model); + const tokenFromPrompt = getTokens(prompt, model); // The token limit is shared between the prompt and the completion. - const maxTokens = length + getTokens(prompt, model); + const maxTokens = tokenFromLength + tokenFromPrompt; try { const completion = await createChatCompletion( diff --git a/tests/specs/cli/commits.ts b/tests/specs/cli/commits.ts index 4662827a..10c48dc1 100644 --- a/tests/specs/cli/commits.ts +++ b/tests/specs/cli/commits.ts @@ -51,6 +51,8 @@ export default testSuite(({ describe }) => { expect(statusAfter.stdout).toBe(''); const { stdout: commitMessage } = await git('log', ['--oneline']); + expect(commitMessage.length <= 50 + 5).toBe(true); + console.log('Committed with:', commitMessage); await fixture.rm(); diff --git a/tests/specs/config.ts b/tests/specs/config.ts index 579b36b6..5b641645 100644 --- a/tests/specs/config.ts +++ b/tests/specs/config.ts @@ -77,16 +77,16 @@ export default testSuite(({ describe }) => { expect(stderr).toMatch('Must be an integer'); }); - test('setting length config less than 5 tokens(20 characters)', async () => { - const { stderr } = await aicommits(['config', 'set', 'length=4'], { + test('setting length config less than 10 characters)', async () => { + const { stderr } = await aicommits(['config', 'set', 'length=10'], { reject: false, }); - expect(stderr).toMatch('Must be greater than 5 tokens(20 characters))'); + expect(stderr).toMatch(/must be greater than 20 characters/i); }); - test('setting valid length config', async () => { - const length = 'length=100'; + test('setting length config less than 60 characters', async () => { + const length = 'length=60'; await aicommits(['config', 'set', length]); const configFile = await fs.readFile(configPath, 'utf8');