Cleanup/getting started/cli (#122)

* Consistently report Bedrock and other engine errors. * Better align CLI logging and output at the right information level. * Fix several bugs from streaming handling. * Rework defaults for better getting started experience
DavidSouther · May 20, 2024 · de04484 · de04484
1 parent a4925b6
commit de04484
Show file tree

Hide file tree

Showing 25 changed files with 346 additions and 194 deletions.
diff --git a/README.md b/README.md
@@ -6,25 +6,25 @@ Write your outline.
 Prompt Ailly to continue to keep writing.
 Edit its output, and get even more text like that.
 
-Rhymes with *daily*.
+Rhymes with _daily_.
 
 Ailly's best feature is rapid prompt engineering iteration. By keeping your prompts in snippets on the file system, you can make very fine-grained changes to your prompt and immediately see the difference in the output. You can also use all of your normal source control tooling to track changes over time: both your changes, and those from the LLM.
 
-## Quickstart
+## CLI Quickstart
 
-To get started, follow these steps:
+To get started on the command line, follow these steps:
 
-1. Create a folder named `jokes` and `cd` into it.
-2. Create a file named `10_chickens.md` with "Tell me a joke about chickens" as the content.
-3. Run Ailly using NodeJS: `npx @ailly/cli`
+1. Ask for a joke - `npx @ailly/cli --prompt 'Tell me a joke'`
+1. Create a folder named `jokes` and change directory into it.
+1. Create a file named `10_chickens.md` with "Tell me a joke about chickens" as the content.
+1. Run Ailly using NodeJS: `npx @ailly/cli`
    - See the joke in `10_chickens.md.ailly.md`
-2. Create a file named `jokes/.aillyrc` with "You are a farmer writing jokes for your other barnyard animals."
+1. Create a file named `.aillyrc` with "You are a farmer writing jokes for your other barnyard animals."
    - Include other system prompts, level setting expectations. etc.
    - Run Ailly with the same command, and see how the joke changes.
-3. Create more numbered files, such as `jokes/20_knock_knock.md` with "Turn the chicken joke into a knock knock joke."
-4. Run Ailly using NodeJS: `npx @ailly/cli 20_knock_knock.md`
-  - Ailly will send each file to the configured LLM engine and write its result.
-  - `20_knock_knock.md.ailly.md` will have the new knock knock joke based on the chicken joke it first wrote!
+1. Create more numbered files, such as `20_knock_knock.md` with "Turn the chicken joke into a knock knock joke."
+1. Run Ailly using NodeJS: `npx @ailly/cli 20_knock_knock.md`
+   - `20_knock_knock.md.ailly.md` will have the new knock knock joke based on the chicken joke it first wrote!
 
 ### System Context
 

diff --git a/cli/src/args.ts b/cli/src/args.ts
@@ -42,7 +42,6 @@ export function makeArgs(argv = process.argv) {
         default: process.env["AILLY_SYSTEM"],
         short: "s",
       },
-      stream: { type: "boolean", default: false },
       "request-limit": {
         type: "string",
         default: process.env["AILLY_REQUEST_LIMIT"],
@@ -57,7 +56,7 @@ export function makeArgs(argv = process.argv) {
       help: { type: "boolean", short: "h", default: false },
       version: { type: "boolean", default: false },
       "log-level": { type: "string", default: undefined },
-      "log-format": { type: "string", default: undefined },
+      "log-format": { type: "string", default: "pretty" },
       verbose: { type: "boolean", default: false, short: "v" },
     },
   });
@@ -89,7 +88,6 @@ export function help() {
       'none' includes no additional content (including no system context) when generating.
       (note: context is separate from isolated. isolated: true with either 'content' or 'folder' will result in the same behavior with either. With 'none', Ailly will send _only_ the prompt when generating.)
 
-    --stream (--prompt only) print responses as they return.
     -e, --edit use Ailly in edit mode. Provide a single file in paths, an edit marker, and a prompt. The path will be updated with the edit marker at the prompt.
     -l, --lines the lines to edit as '[start]:[end]' with start inclusive, and end exclusive. With only '[start]', will insert after. With only ':[end]', will insert before.
 
@@ -106,7 +104,7 @@ export function help() {
     --summary will show a pricing expectation before running and prompt for OK.
     -y, —-yes will skip any prompts.
     -v, --verbose, --log-level v and verbose will set log level to info; --log-level can be a string or number and use jefri/jiffies logging levels. Ailly uses warn for reporting details on errors, info for general runtime progress, and debug for details of requests and responses.
-    --log-format json or pretty; default is pretty when run in a pipe. JSON prints in JSONL format.
+    --log-format json or pretty; default is pretty. JSON prints in JSONL format.
 
     --version will print the cli and core versions
     -h, --help will print this message and exit.

diff --git a/cli/src/fs.ts b/cli/src/fs.ts
@@ -1,10 +1,10 @@
 import {
   AillyEdit,
   Content,
-  View,
   loadContent,
   makeCLIContent,
 } from "@ailly/core/lib/content/content.js";
+import { loadTemplateView } from "@ailly/core/lib/content/template.js";
 import {
   PipelineSettings,
   LOGGER as ROOT_LOGGER,
@@ -13,13 +13,13 @@ import {
 import { assertExists } from "@davidsouther/jiffies/lib/cjs/assert.js";
 import { FileSystem } from "@davidsouther/jiffies/lib/cjs/fs.js";
 import {
+  LEVEL,
   basicLogFormatter,
   getLogLevel,
   getLogger,
 } from "@davidsouther/jiffies/lib/cjs/log.js";
 import { Console } from "node:console";
 import { join, resolve } from "node:path";
-import { parse } from "yaml";
 import { Args } from "./args.js";
 
 export const LOGGER = getLogger("@ailly/cli");
@@ -34,39 +34,55 @@ export async function loadFs(
   const root = resolve(args.values.root ?? ".");
   fs.cd(root);
 
+  const positionals = args.positionals.map((a) => resolve(join(root, a)));
+  const hasPositionals = positionals.length > 0;
+  const hasPrompt = (args.values.prompt ?? "") !== "";
+  const isPipe = !hasPositionals && hasPrompt;
+  const logLevel =
+    args.values["log-level"] ??
+    (args.values.verbose ? "verbose" : isPipe ? "silent" : "info");
+  ROOT_LOGGER.console = LOGGER.console = isPipe
+    ? new Console(process.stderr, process.stderr)
+    : global.console;
+  ROOT_LOGGER.level = LOGGER.level = getLogLevel(
+    logLevel === "trace" ? "0.5" : logLevel
+  );
+  const logFormat = args.values["log-format"];
+  const formatter =
+    logFormat === "json" ||
+    ROOT_LOGGER.level < LEVEL.DEBUG ||
+    args.values.verbose
+      ? JSON.stringify
+      : basicLogFormatter;
+  ROOT_LOGGER.format = LOGGER.format = formatter;
+
+  const argContext =
+    args.values.context ?? (args.values.edit ? "folder" : undefined);
+
+  const out = resolve(args.values.out ?? root);
+
+  const templateView = await loadTemplateView(
+    fs,
+    ...(args.values["template-view"] ?? [])
+  );
+  const isExpensiveModel = args.values.model?.includes("opus") ?? false;
+  const requestLimit =
+    args.values["request-limit"] ?? isExpensiveModel ? 1 : undefined;
+
   const settings = await makePipelineSettings({
     root,
-    out: resolve(args.values.out ?? root),
-    context: args.values.context ?? (args.values.edit ? "folder" : undefined),
+    out,
+    context: argContext,
     isolated: args.values.isolated,
     combined: args.values.combined,
     engine: args.values.engine,
     model: args.values.model,
     plugin: args.values.plugin,
-    templateView: await loadTemplateView(fs, args.values["template-view"]),
+    templateView,
     overwrite: !args.values["no-overwrite"],
-    requestLimit:
-      args.values["request-limit"] ?? args.values.model?.includes("opus")
-        ? 1
-        : undefined,
+    requestLimit,
   });
 
-  const positionals = args.positionals.map((a) => resolve(join(root, a)));
-  const hasPositionals = positionals.length > 0;
-  const hasPrompt =
-    args.values.prompt !== undefined && args.values.prompt !== "";
-  const isPipe = !hasPositionals && hasPrompt;
-  const logLevel =
-    args.values["log-level"] ??
-    (args.values.verbose ? "verbose" : isPipe ? "silent" : undefined);
-  const logFormat = args.values["log-format"] ?? (isPipe ? "pretty" : "json");
-  ROOT_LOGGER.console = LOGGER.console = isPipe
-    ? new Console(process.stderr, process.stderr)
-    : global.console;
-  ROOT_LOGGER.level = LOGGER.level = getLogLevel(logLevel);
-  ROOT_LOGGER.format = LOGGER.format =
-    logFormat == "json" ? JSON.stringify : basicLogFormatter;
-
   const system = args.values.system ?? "";
   const depth = Number(args.values["max-depth"]);
 
@@ -159,30 +175,6 @@ export function makeEdit(
   }
 }
 
-/**
- * Read, parse, and validate a template view.
- */
-export async function loadTemplateView(
-  fs: FileSystem,
-  paths?: string[]
-): Promise<View> {
-  if (!paths) return {};
-  let view = /* @type View */ {};
-  for (const path of paths) {
-    try {
-      LOGGER.debug(`Reading template-view at ${path}`);
-      const file = await fs.readFile(path);
-      const parsed = parse(file);
-      if (parsed && typeof parsed == "object") {
-        view = { ...view, ...parsed };
-      }
-    } catch (err) {
-      LOGGER.warn(`Failed to load template-view at ${path}`, { err });
-    }
-  }
-  return view;
-}
-
 async function readAll(readable: typeof process.stdin): Promise<string> {
   return new Promise<string>((resolve, reject) => {
     const chunks: string[] = [];

diff --git a/cli/src/index.ts b/cli/src/index.ts
@@ -38,7 +38,6 @@ export async function main() {
     loaded.settings
   );
 
-  const last = loaded.content.at(-1);
   switch (true) {
     case args.values["update-db"]:
       await generator.updateDatabase();
@@ -58,38 +57,48 @@ export async function main() {
     default:
       LOGGER.info(`Starting ${loaded.content.length} requests`);
       generator.start();
-      if (!args.values.stream) {
-        await finish(generator);
-      }
-      if (last == "/dev/stdout") {
-        const prompt = loaded.context[last];
-        if (args.values.stream) {
-          // Lazy spin until the request starts
+
+      if (loaded.content.at(-1) == "/dev/stdout") {
+        loaded.content.splice(-1, 1);
+        const prompt = loaded.context["/dev/stdout"];
+        const edit = prompt.context.edit;
+        if (!edit) {
           const stream = await prompt.responseStream.promise;
           for await (const word of stream) {
             process.stdout.write(word);
           }
-          await finish(generator);
+          process.stdout.write("\n");
         }
-        console.debug(`Finished prompt, final meta`, { meta: prompt.meta });
+        await finish(generator);
+        LOGGER.debug(`Finished prompt, final meta`, { meta: prompt.meta });
         if (prompt.meta?.debug?.finish == "failed") {
-          console.error(prompt.meta.debug.error?.message ?? "Unknown failure", {
-            debug: prompt.meta.debug,
-          });
-          return;
-        }
-        const edit = prompt.context.edit;
-        if (edit) {
+          LOGGER.debug(`Prompt run error`, { debug: prompt.meta.debug });
+          const error = generator.formatError(prompt) ?? "Unknown failure";
+          console.error(error);
+        } else if (edit) {
           await doEdit(fs, loaded, edit, prompt, args.values.yes ?? false);
-        } else {
-          console.log(prompt.response);
         }
-      } else {
-        await writeContent(
-          fs,
-          loaded.content.map((c) => loaded.context[c])
+      }
+
+      await finish(generator);
+      const errors = generator
+        .errors()
+        .filter((c) => c.content.name != "/dev/stdout");
+      if (errors.length > 0) {
+        console.error(
+          [
+            "There were errors when generating responses:",
+            ...errors.map(
+              (err) => `  ${err.content.name}: ${err.errorMessage}`
+            ),
+          ].join("\n")
         );
       }
+
+      const toWrite = loaded.content
+        .map((c) => loaded.context[c])
+        .filter((c) => c.meta?.debug?.finish !== "failed");
+      await writeContent(fs, toWrite);
       break;
   }
 }

diff --git a/...nt/30_experiments/10_theory_of_mind/_s.md → ...30_experiments/10_theory_of_mind/.aillyrc b/...nt/30_experiments/10_theory_of_mind/_s.md → ...30_experiments/10_theory_of_mind/.aillyrc
@@ -1,6 +1,5 @@
 ---
-tuning: false
-skip: true
+parent: root
 ---
 
 You are being interviewed. Keep your answers short.
diff --git a/...10_theory_of_mind/10_three_year_old/_s.md → ...theory_of_mind/10_three_year_old/.aillyrc b/...10_theory_of_mind/10_three_year_old/_s.md → ...theory_of_mind/10_three_year_old/.aillyrc
@@ -1,4 +1,5 @@
 ---
+parent: always
 isolated: true
 ---
 

diff --git a/...s/10_theory_of_mind/20_ten_year_old/_s.md → ...0_theory_of_mind/20_ten_year_old/.aillyrc b/...s/10_theory_of_mind/20_ten_year_old/_s.md → ...0_theory_of_mind/20_ten_year_old/.aillyrc
@@ -1,4 +1,5 @@
 ---
+parent: always
 isolated: true
 ---
 

diff --git a/core/package.json b/core/package.json
@@ -28,7 +28,7 @@
   "dependencies": {
     "@aws-sdk/client-bedrock-runtime": "^3.427.0",
     "@aws-sdk/credential-providers": "^3.572.0",
-    "@davidsouther/jiffies": "^2.2.4",
+    "@davidsouther/jiffies": "^2.2.5",
     "@dqbd/tiktoken": "^1.0.7",
     "gitignore-parser": "^0.0.2",
     "gray-matter": "^4.0.3",

diff --git a/core/src/actions/generate_manager.ts b/core/src/actions/generate_manager.ts
@@ -83,6 +83,15 @@ export class GenerateManager {
     this.threads.forEach((thread) => thread.forEach(drain));
   }
 
+  formatError(content: Content): string | undefined {
+    const error = this.engine.formatError?.(content);
+    if (error !== undefined) {
+      return error;
+    }
+
+    return content.meta?.debug?.error!.message;
+  }
+
   async allSettled(): Promise<PromiseSettledResult<Content>[]> {
     const runners = this.threadRunners.map((r) => r.allSettled());
     const runnersPromises = Promise.all(runners);
@@ -96,4 +105,17 @@ export class GenerateManager {
   async updateDatabase(): Promise<void> {
     await this.rag.update(this.threads.flat());
   }
+
+  errors() {
+    return this.threads
+      .map((thread) =>
+        thread
+          .filter((content) => content.meta?.debug?.finish == "failed")
+          .map((content) => ({
+            content,
+            errorMessage: this.formatError(content) ?? "Unknown Failure",
+          }))
+      )
+      .flat();
+  }
 }
diff --git a/core/src/actions/prompt_thread.test.ts b/core/src/actions/prompt_thread.test.ts
@@ -119,8 +119,17 @@ describe("generateOne", () => {
       state.engine
     );
     await drain(content);
-    expect(state.logger.info).toHaveBeenCalledWith("Preparing /c.txt");
-    expect(state.logger.info).toHaveBeenCalledWith("Calling noop");
+    expect(state.logger.info).toHaveBeenCalledWith("Running /c.txt");
+    expect(state.logger.debug).toHaveBeenCalledWith("Generating response", {
+      engine: "noop",
+      messages: [
+        { role: "system", content: "" },
+        { role: "user", content: "prompt a" },
+        { role: "assistant", content: "response a" },
+        { role: "user", content: "response b" },
+        { role: "user", content: "tell me a joke\n" },
+      ],
+    });
     expect(content.response).toMatch(/^noop response for c.txt:/);
   });
 });
@@ -189,14 +198,6 @@ describe("PromptThread", () => {
     expect(thread.isDone).toBe(true);
     expect(thread.finished).toBe(3);
     expect(thread.errors.length).toBe(0);
-
-    expect(content[0].response).toEqual(
-      `noop response for a.txt:\n\nsystem: \nuser: prompt a\nassistant: response a\nprompt a`
-    );
-    expect(content[1].response).toBeUndefined();
-    expect(content[2].response).toEqual(
-      `noop response for c.txt:\n\nsystem: \nuser: tell me a joke\n\ntell me a joke\n`
-    );
   });
 
   it("runs sequence", async () => {
@@ -224,13 +225,5 @@ describe("PromptThread", () => {
     expect(thread.isDone).toBe(true);
     expect(thread.finished).toBe(3);
     expect(thread.errors.length).toBe(0);
-
-    expect(content[0].response).toEqual(
-      `noop response for a.txt:\n\nsystem: \nuser: prompt a\nassistant: response a\nprompt a`
-    );
-    expect(content[1].response).toBeUndefined();
-    expect(content[2].response).toEqual(
-      `noop response for c.txt:\n\nsystem: \nuser: prompt a\nassistant: noop response for a.txt:\n\nsystem: \nuser: prompt a\nassistant: response a\nprompt a\nuser: response b\nuser: tell me a joke\n\ntell me a joke\n`
-    );
   });
 });