From b1781058a587ea8f74a047308cbe67b6cc5117de Mon Sep 17 00:00:00 2001
From: Karthik Kalyanaraman <karthik@scale3labs.com>
Date: Thu, 4 Apr 2024 00:01:40 -0700
Subject: [PATCH 01/16] Pagination bug

---
 components/project/eval/eval.tsx    | 4 ++--
 components/project/eval/prompts.tsx | 4 ++--
 lib/services/trace_service.ts       | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/components/project/eval/eval.tsx b/components/project/eval/eval.tsx
index 8441bfdc..7b139ddd 100644
--- a/components/project/eval/eval.tsx
+++ b/components/project/eval/eval.tsx
@@ -62,9 +62,9 @@ export default function Eval({ email }: { email: string }) {
       }
       if (result) {
         if (data) {
-          setData((prevData: any) => [...prevData, ...result.prompts.result]);
+          setData((prevData: any) => [...prevData, ...result?.prompts?.result || []]);
         } else {
-          setData(result.prompts.result);
+          setData(result?.prompts?.result || []);
         }
       }
       setPage((currentPage) => currentPage + 1);
diff --git a/components/project/eval/prompts.tsx b/components/project/eval/prompts.tsx
index bb385183..9ea37d51 100644
--- a/components/project/eval/prompts.tsx
+++ b/components/project/eval/prompts.tsx
@@ -52,9 +52,9 @@ export default function Prompts({ email }: { email: string }) {
       }
       if (result) {
         if (data) {
-          setData((prevData: any) => [...prevData, ...result.prompts.result]);
+          setData((prevData: any) => [...prevData, ...result?.prompts?.result || []]);
         } else {
-          setData(result.prompts.result);
+          setData(result?.prompts?.result || []);
         }
       }
       setPage((currentPage) => currentPage + 1);
diff --git a/lib/services/trace_service.ts b/lib/services/trace_service.ts
index 1761aa06..ad7c35c7 100644
--- a/lib/services/trace_service.ts
+++ b/lib/services/trace_service.ts
@@ -244,7 +244,7 @@ export class TraceService implements ITraceService {
 
       const md = { page, page_size: pageSize, total_pages: totalPages };
       if (page! > totalPages) {
-        throw Error("Page number is greater than total pages");
+        page = totalPages;
       }
       const query = sql.select(
         `* FROM ${project_id} WHERE attributes LIKE '%${attribute}%' ORDER BY 'start_time' DESC LIMIT ${pageSize} OFFSET ${
@@ -325,7 +325,7 @@ export class TraceService implements ITraceService {
 
       const md = { page, page_size: pageSize, total_pages: totalPages };
       if (page! > totalPages) {
-        throw Error("Page number is greater than total pages");
+        page = totalPages;
       }
       const query = sql.select(
         `* FROM ${project_id} ORDER BY 'createdAt' DESC LIMIT ${pageSize} OFFSET ${

From 0e963e226af17ffdba6282496d1979c6e30c20ad Mon Sep 17 00:00:00 2001
From: Karthik Kalyanaraman <karthik@scale3labs.com>
Date: Thu, 4 Apr 2024 17:13:49 -0700
Subject: [PATCH 02/16] Bug fix

---
 app/api/user/route.ts         | 1 -
 components/project/traces.tsx | 2 --
 lib/utils.ts                  | 1 +
 3 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/app/api/user/route.ts b/app/api/user/route.ts
index d7e95f34..a10d34bf 100644
--- a/app/api/user/route.ts
+++ b/app/api/user/route.ts
@@ -83,7 +83,6 @@ export async function PUT(req: NextRequest) {
   }
 
   if ("status" in data) {
-    console.log("updating status");
     const user = await prisma.user.update({
       where: {
         id,
diff --git a/components/project/traces.tsx b/components/project/traces.tsx
index 523aa352..1f56c03e 100644
--- a/components/project/traces.tsx
+++ b/components/project/traces.tsx
@@ -80,8 +80,6 @@ export default function Traces({ email }: { email: string }) {
       }
 
       // Merge the new data with the existing data
-      console.log("currentData", currentData);
-      console.log("newData", newData);
       if (currentData.length > 0) {
         const updatedData = [...currentData, ...newData];
 
diff --git a/lib/utils.ts b/lib/utils.ts
index 584d22f2..2c6fd519 100644
--- a/lib/utils.ts
+++ b/lib/utils.ts
@@ -326,6 +326,7 @@ export function calculatePriceFromUsage(
     output_tokens: number;
   }
 ): any {
+  if (!model) return { total: 0, input: 0, output: 0 };
   if (vendor === "openai") {
     const costTable = OPENAI_PRICING[model.includes("gpt-4") ? "gpt-4" : model];
     if (costTable) {

From af27884c839d3900fd9e778381a986d0631877b6 Mon Sep 17 00:00:00 2001
From: Karthik Kalyanaraman <karthik@scale3labs.com>
Date: Sun, 9 Jun 2024 22:23:27 -0700
Subject: [PATCH 03/16] Experiments view

---
 .../[project_id]/evaluate/page-client.tsx     |  59 +-
 .../experiments/[run_id]/page.tsx             | 319 +++++++++
 .../experiments/experiment_result.ts          | 629 ++++++++++++++++++
 .../[project_id]/experiments/page-client.tsx  | 109 ++-
 components/evaluate/create-test.tsx           |   4 +
 components/evaluate/evaluation-row.tsx        |   2 +-
 components/evaluate/evaluation-table.tsx      |   9 +-
 .../playground/prompt-registry-dialog.tsx     |   2 +-
 components/shared/add-to-dataset.tsx          |   2 +-
 components/shared/conversation-view.tsx       |  52 +-
 components/shared/llm-picker.tsx              |   2 +-
 components/shared/nav.tsx                     |   8 +-
 components/shared/vendor-metadata.tsx         |  30 +-
 components/traces/trace_graph.tsx             |   5 +-
 components/ui/sheet.tsx                       |   2 +-
 lib/utils.ts                                  |  56 ++
 16 files changed, 1199 insertions(+), 91 deletions(-)
 create mode 100644 app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx
 create mode 100644 app/(protected)/project/[project_id]/experiments/experiment_result.ts

diff --git a/app/(protected)/project/[project_id]/evaluate/page-client.tsx b/app/(protected)/project/[project_id]/evaluate/page-client.tsx
index adb86b11..add3f5b6 100644
--- a/app/(protected)/project/[project_id]/evaluate/page-client.tsx
+++ b/app/(protected)/project/[project_id]/evaluate/page-client.tsx
@@ -8,9 +8,7 @@ import EvaluationTable, {
   EvaluationTableSkeleton,
 } from "@/components/evaluate/evaluation-table";
 import { AddtoDataset } from "@/components/shared/add-to-dataset";
-import { Separator } from "@/components/ui/separator";
-import { Skeleton } from "@/components/ui/skeleton";
-import { cn } from "@/lib/utils";
+import { Button } from "@/components/ui/button";
 import { Test } from "@prisma/client";
 import { RabbitIcon } from "lucide-react";
 import { useParams } from "next/navigation";
@@ -123,57 +121,12 @@ export default function PageClient({ email }: { email: string }) {
 function PageSkeleton() {
   return (
     <div className="w-full flex flex-col">
-      <div className="flex flex-row gap-4 absolute top-[14rem] w-full md:px-24 px-12">
-        <div className="bg-primary-foreground flex flex-col gap-0 border rounded-md w-[12rem] h-fit">
-          {Array.from({ length: 5 }).map((_, i) => {
-            return (
-              <div className="flex flex-col" key={i}>
-                <div
-                  className={cn(
-                    "flex flex-col gap-4 p-4 items-start cursor-pointer",
-                    i === 0 ? "rounded-t-md" : "",
-                    i === 4 ? "rounded-b-md" : ""
-                  )}
-                >
-                  <div
-                    className={cn(
-                      "text-sm text-muted-foreground font-semibold capitalize",
-                      i === 0 ? "text-primary" : ""
-                    )}
-                  >
-                    <Skeleton className="w-20 h-6" />
-                  </div>
-                  <span className="text-[0.6rem] text-primary font-bold">
-                    <Skeleton className="w-20 h-6" />
-                  </span>
-                </div>
-                <Separator />
-              </div>
-            );
-          })}
-        </div>
-        <div className="bg-primary-foreground flex flex-col gap-12 border rounded-md w-full p-4 mb-24">
-          <div className="flex flex-row">
-            <div className="flex flex-col gap-3 items-start w-[25rem]">
-              <div className="flex flex-col gap-1">
-                <div className="text-xl font-semibold capitalize">
-                  <Skeleton className="w-20 h-6" />
-                </div>
-                <span className="text-xs font-semibold text-muted-foreground">
-                  <Skeleton className="w-20 h-6" />
-                </span>
-              </div>
-              <span className="text-sm text-primary font-bold">
-                <Skeleton className="w-20 h-6" />
-              </span>
-              <div className="text-sm text-muted-foreground">
-                <Skeleton className="w-20 h-6" />
-              </div>
-            </div>
-            <LargeChartSkeleton />
-          </div>
-          <EvaluationTableSkeleton />
+      <div className="flex flex-col gap-12 top-[16rem] w-full md:px-24 px-12 mb-24">
+        <LargeChartSkeleton />
+        <div className="flex flex-col gap-2">
+          <Button variant="outline" disabled={true} />
         </div>
+        <EvaluationTableSkeleton />
       </div>
     </div>
   );
diff --git a/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx b/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx
new file mode 100644
index 00000000..82c8973b
--- /dev/null
+++ b/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx
@@ -0,0 +1,319 @@
+"use client";
+
+import { Conversation } from "@/components/shared/conversation-view";
+import { Badge } from "@/components/ui/badge";
+import { Button } from "@/components/ui/button";
+import { Checkbox } from "@/components/ui/checkbox";
+import { Separator } from "@/components/ui/separator";
+import {
+  Sheet,
+  SheetContent,
+  SheetDescription,
+  SheetHeader,
+  SheetTitle,
+} from "@/components/ui/sheet";
+import { cn } from "@/lib/utils";
+import { ChevronLeft, ChevronRight, Copy, MoveDiagonal, X } from "lucide-react";
+import { useParams, useRouter } from "next/navigation";
+import { useState } from "react";
+import { toast } from "sonner";
+import data from "../experiment_result";
+
+export default function Experiments() {
+  const router = useRouter();
+  const runId = useParams()?.run_id as string;
+  const experiment = data.find((exp) => exp.eval.run_id === runId);
+  const [expand, setExpand] = useState(
+    experiment?.samples && experiment?.samples?.length > 0
+      ? experiment?.samples.map(() => false)
+      : []
+  );
+  return (
+    <div className="w-full flex flex-col gap-4">
+      <div className="md:px-24 px-12 py-12 flex justify-between bg-muted">
+        <h1 className="text-2xl font-semibold">Run ID: {runId}</h1>
+        <Button variant={data.length > 0 ? "default" : "outline"}>
+          New Experiment
+        </Button>
+      </div>
+      <div className="flex flex-col gap-12 w-full px-12">
+        <div className="flex gap-2">
+          <Button variant="outline" onClick={() => router.back()}>
+            <ChevronLeft className="text-muted-foreground" size={20} />
+            Back
+          </Button>
+          <Button
+            variant={"outline"}
+            size={"icon"}
+            onClick={() => {
+              setExpand(
+                expand.map(() => {
+                  return !expand[0];
+                })
+              );
+            }}
+          >
+            {expand.some((v) => v === false) && (
+              <MoveDiagonal className="text-muted-foreground" size={20} />
+            )}
+            {!expand.some((v) => v === false) && (
+              <X className="text-muted-foreground" size={20} />
+            )}
+          </Button>
+        </div>
+        {!experiment ||
+          (experiment?.samples?.length === 0 && (
+            <div className="flex flex-col items-center gap-2 mt-6">
+              <p className="text-center text-md">
+                No samples found for this experiment.
+              </p>
+              <Button className="w-fit">New Experiment</Button>
+            </div>
+          ))}
+        {experiment?.samples && experiment?.samples?.length > 0 && (
+          <div className="overflow-y-scroll">
+            <table className="table-auto overflow-x-scroll w-screen border-separate border border-muted rounded-md">
+              <thead className="bg-muted">
+                <tr>
+                  <th className="w-12 rounded-md p-2">
+                    <Checkbox />
+                  </th>
+                  <th className="p-2 rounded-md text-sm font-medium">Input</th>
+                  <th className="p-2 rounded-md text-sm font-medium">Target</th>
+                  <th className="p-2 rounded-md text-sm font-medium">{`Output - (${experiment.eval.model})`}</th>
+                  <th className="p-2 rounded-md text-sm font-medium">
+                    Explanation
+                  </th>
+                </tr>
+              </thead>
+              <tbody>
+                {experiment.samples.map((sample, i) => (
+                  <SampleRow
+                    key={i}
+                    index={i}
+                    plan={experiment.plan}
+                    sample={sample}
+                    model={experiment.eval.model}
+                    expand={expand[i]}
+                    setExpand={(value: boolean, index: number) => {
+                      setExpand(
+                        expand.map((_, j) => {
+                          return j === index ? value : expand[j];
+                        })
+                      );
+                    }}
+                  />
+                ))}
+              </tbody>
+            </table>
+          </div>
+        )}
+      </div>
+    </div>
+  );
+}
+
+function SampleRow({
+  index,
+  sample,
+  plan,
+  model,
+  expand,
+  setExpand,
+}: {
+  index: number;
+  sample: any;
+  plan: any;
+  model: string;
+  expand: boolean;
+  setExpand: (value: boolean, index: number) => void;
+}) {
+  const [open, setOpen] = useState(false);
+  return (
+    <tr
+      className="hover:cursor-pointer hover:bg-muted group"
+      onClick={() => setOpen(!open)}
+    >
+      <td className="px-2 py-1 text-center">
+        <Checkbox onClick={(e) => e.stopPropagation()} />
+      </td>
+      <td
+        className={cn(
+          "text-sm px-2 py-1 max-w-80 relative",
+          expand ? "" : "truncate"
+        )}
+      >
+        <UtilityButton
+          index={index}
+          expand={expand}
+          setExpand={setExpand}
+          text={sample.input || ""}
+        />
+        {typeof sample.input === "string"
+          ? sample.input
+          : Array.isArray(sample.input)
+          ? sample.input[sample.input.length - 1].content
+          : ""}
+      </td>
+      <td
+        className={cn(
+          "relative text-sm px-2 py-1 max-w-80",
+          expand ? "" : "truncate"
+        )}
+      >
+        <UtilityButton
+          index={index}
+          expand={expand}
+          setExpand={setExpand}
+          text={sample.target || ""}
+        />
+        {sample.target || "none"}
+      </td>
+      <td
+        className={cn("px-2 py-1 max-w-80 relative", expand ? "" : "truncate")}
+      >
+        <UtilityButton
+          index={index}
+          expand={expand}
+          setExpand={setExpand}
+          text={
+            sample.output?.choices && sample.output.choices?.length > 0
+              ? sample.output.choices[sample.output.choices.length - 1].message
+                  ?.content
+              : ""
+          }
+        />
+        <div className="flex flex-col gap-2">
+          <Badge
+            className={cn(
+              sample.score?.value === "I"
+                ? "hover:bg-red-200 bg-red-200 text-destructive border-destructive"
+                : "hover:bg-green-200 bg-green-200 text-green-700 border-green-700",
+              "border w-fit"
+            )}
+          >
+            {sample.score?.value === "I" ? "INCORRECT" : "CORRECT"}
+          </Badge>
+          <Badge variant={"secondary"} className="w-fit">
+            {sample.output?.model || ""}
+          </Badge>
+          <p className="text-sm">
+            {sample.output?.choices && sample.output.choices?.length > 0
+              ? sample.output.choices[sample.output.choices.length - 1].message
+                  ?.content
+              : ""}
+          </p>
+        </div>
+      </td>
+      <td
+        className={cn(
+          "text-sm px-2 py-1 max-w-80 relative",
+          expand ? "" : "truncate"
+        )}
+      >
+        <UtilityButton
+          index={index}
+          expand={expand}
+          setExpand={setExpand}
+          text={sample.score?.explanation || ""}
+        />
+        {sample.score?.explanation || "none"}
+      </td>
+      <Sheet open={open} onOpenChange={setOpen}>
+        <SheetContent
+          onCloseAutoFocus={(e) => e.preventDefault()}
+          onInteractOutside={(e) => {
+            e.preventDefault();
+          }}
+          className={"overflow-y-scroll w-1/4"}
+          onClick={(e) => e.stopPropagation()}
+        >
+          <SheetHeader>
+            <SheetTitle>Messages</SheetTitle>
+            <SheetDescription>
+              Messages exchanged between the model and the user.
+            </SheetDescription>
+          </SheetHeader>
+          <Separator className="my-3" />
+          <p className="text-medium my-3 font-semibold">Plan</p>
+          <div className="flex my-4 gap-2 flex-wrap items-center">
+            {plan.steps.map((step: any, i: number) => (
+              <div className="flex gap-2 items-center" key={i}>
+                <Badge variant={"outline"}>{step.solver}</Badge>
+                {i < plan.steps.length - 1 && <ChevronRight size={12} />}
+              </div>
+            ))}
+          </div>
+          <Separator className="my-3" />
+          <p className="text-medium my-3 font-semibold">Messages</p>
+          <Conversation messages={sample.messages} model={model} />
+        </SheetContent>
+      </Sheet>
+    </tr>
+  );
+}
+
+function ExpandContractButton({
+  index,
+  expand,
+  setExpand,
+}: {
+  index: number;
+  expand: boolean;
+  setExpand: (expand: boolean, index: number) => void;
+}) {
+  return (
+    <Button
+      variant={"outline"}
+      size={"icon"}
+      className="w-6 h-6 flex items-center justify-center"
+      onClick={(e) => {
+        e.stopPropagation();
+        setExpand(!expand, index);
+      }}
+    >
+      {!expand && <MoveDiagonal className="text-muted-foreground" size={20} />}
+      {expand && <X className="text-muted-foreground" size={20} />}
+    </Button>
+  );
+}
+
+function CopyButton({ text }: { text: string }) {
+  return (
+    <Button
+      variant={"outline"}
+      size={"icon"}
+      className={"w-6 h-6 flex items-center justify-center"}
+      onClick={(e) => {
+        e.stopPropagation();
+        navigator.clipboard.writeText(text);
+        toast.success("Copied to clipboard");
+      }}
+    >
+      <Copy className="text-muted-foreground" size={15} />
+    </Button>
+  );
+}
+
+function UtilityButton({
+  index,
+  expand,
+  setExpand,
+  text,
+}: {
+  index: number;
+  expand: boolean;
+  setExpand: (expand: boolean, index: number) => void;
+  text: string;
+}) {
+  return (
+    <div className="absolute group-hover:flex gap-0 items-center hidden top-0 right-0">
+      <ExpandContractButton
+        index={index}
+        expand={expand}
+        setExpand={setExpand}
+      />
+      <CopyButton text={text} />
+    </div>
+  );
+}
diff --git a/app/(protected)/project/[project_id]/experiments/experiment_result.ts b/app/(protected)/project/[project_id]/experiments/experiment_result.ts
new file mode 100644
index 00000000..1f44338f
--- /dev/null
+++ b/app/(protected)/project/[project_id]/experiments/experiment_result.ts
@@ -0,0 +1,629 @@
+let data = [
+  {
+    version: 1,
+    status: "success",
+    eval: {
+      task: "security_guide",
+      task_version: 0,
+      task_file: "src/main.py",
+      task_id: "WjZbe8iaXETqiGGECFzW8E",
+      run_id: "k9FsZc425gEjCFNrXszPqW",
+      created: "2024-06-08T15:48:55",
+      dataset: {
+        name: "security_guide",
+        location: "example://security_guide",
+      },
+      model: "openai/gpt-3.5-turbo",
+      task_attribs: {},
+      task_args: {},
+      model_args: {},
+      config: {},
+      packages: {
+        inspect_ai: "0.3.10",
+      },
+    },
+    plan: {
+      name: "plan",
+      steps: [
+        {
+          solver: "generate",
+          params: {},
+        },
+      ],
+      config: {},
+    },
+    results: {
+      scorer: {
+        name: "includes",
+        params: {},
+      },
+      metrics: {
+        accuracy: {
+          name: "accuracy",
+          value: 0.5,
+          options: {},
+        },
+        bootstrap_std: {
+          name: "bootstrap_std",
+          value: 0.35385166383669864,
+          options: {},
+        },
+      },
+    },
+    stats: {
+      started_at: "2024-06-08T15:48:55",
+      completed_at: "2024-06-08T15:48:58",
+      model_usage: {
+        "openai/gpt-3.5-turbo": {
+          input_tokens: 30,
+          output_tokens: 451,
+          total_tokens: 481,
+        },
+      },
+    },
+    samples: [
+      {
+        id: 1,
+        epoch: 1,
+        input: [
+          {
+            content: "How do I prevent SQL Injection attacks?",
+            source: "input",
+            role: "user",
+          },
+        ],
+        target: "use parameterized queries and prepared statements",
+        messages: [
+          {
+            content: "How do I prevent SQL Injection attacks?",
+            source: "input",
+            role: "user",
+          },
+          {
+            content:
+              "1. Use parameterized queries: Parameterized queries separate SQL code from user input, preventing attackers from inserting malicious code into input fields.\n\n2. Use stored procedures: Stored procedures can prevent SQL injection attacks by defining specific methods for accessing the database, rather than allowing direct access to SQL queries.\n\n3. Input validation: Validate and sanitize all user input before using it in SQL queries. This helps prevent attackers from inserting malicious code into your database.\n\n4. Use ORM frameworks: Object-relational mapping (ORM) frameworks, such as Hibernate or Entity Framework, can help prevent SQL injection attacks by automatically handling parameterized queries and input validation.\n\n5. Limit database permissions: Only grant necessary permissions to database users to prevent unauthorized access and limit the potential damage of a successful SQL injection attack.\n\n6. Update and patch software: Regularly update and patch your database management system and web application to protect against known vulnerabilities that could be exploited by attackers.\n\n7. Use a web application firewall: Deploy a web application firewall to monitor and filter incoming traffic for potential SQL injection attacks. This can help protect your application from malicious code injection.",
+            source: "generate",
+            role: "assistant",
+          },
+        ],
+        output: {
+          model: "gpt-3.5-turbo-0125",
+          choices: [
+            {
+              message: {
+                content:
+                  "1. Use parameterized queries: Parameterized queries separate SQL code from user input, preventing attackers from inserting malicious code into input fields.\n\n2. Use stored procedures: Stored procedures can prevent SQL injection attacks by defining specific methods for accessing the database, rather than allowing direct access to SQL queries.\n\n3. Input validation: Validate and sanitize all user input before using it in SQL queries. This helps prevent attackers from inserting malicious code into your database.\n\n4. Use ORM frameworks: Object-relational mapping (ORM) frameworks, such as Hibernate or Entity Framework, can help prevent SQL injection attacks by automatically handling parameterized queries and input validation.\n\n5. Limit database permissions: Only grant necessary permissions to database users to prevent unauthorized access and limit the potential damage of a successful SQL injection attack.\n\n6. Update and patch software: Regularly update and patch your database management system and web application to protect against known vulnerabilities that could be exploited by attackers.\n\n7. Use a web application firewall: Deploy a web application firewall to monitor and filter incoming traffic for potential SQL injection attacks. This can help protect your application from malicious code injection.",
+                source: "generate",
+                role: "assistant",
+              },
+              stop_reason: "stop",
+            },
+          ],
+          usage: {
+            input_tokens: 15,
+            output_tokens: 222,
+            total_tokens: 237,
+          },
+        },
+        score: {
+          value: "I",
+          answer:
+            "1. Use parameterized queries: Parameterized queries separate SQL code from user input, preventing attackers from inserting malicious code into input fields.\n\n2. Use stored procedures: Stored procedures can prevent SQL injection attacks by defining specific methods for accessing the database, rather than allowing direct access to SQL queries.\n\n3. Input validation: Validate and sanitize all user input before using it in SQL queries. This helps prevent attackers from inserting malicious code into your database.\n\n4. Use ORM frameworks: Object-relational mapping (ORM) frameworks, such as Hibernate or Entity Framework, can help prevent SQL injection attacks by automatically handling parameterized queries and input validation.\n\n5. Limit database permissions: Only grant necessary permissions to database users to prevent unauthorized access and limit the potential damage of a successful SQL injection attack.\n\n6. Update and patch software: Regularly update and patch your database management system and web application to protect against known vulnerabilities that could be exploited by attackers.\n\n7. Use a web application firewall: Deploy a web application firewall to monitor and filter incoming traffic for potential SQL injection attacks. This can help protect your application from malicious code injection.",
+          explanation:
+            "1. Use parameterized queries: Parameterized queries separate SQL code from user input, preventing attackers from inserting malicious code into input fields.\n\n2. Use stored procedures: Stored procedures can prevent SQL injection attacks by defining specific methods for accessing the database, rather than allowing direct access to SQL queries.\n\n3. Input validation: Validate and sanitize all user input before using it in SQL queries. This helps prevent attackers from inserting malicious code into your database.\n\n4. Use ORM frameworks: Object-relational mapping (ORM) frameworks, such as Hibernate or Entity Framework, can help prevent SQL injection attacks by automatically handling parameterized queries and input validation.\n\n5. Limit database permissions: Only grant necessary permissions to database users to prevent unauthorized access and limit the potential damage of a successful SQL injection attack.\n\n6. Update and patch software: Regularly update and patch your database management system and web application to protect against known vulnerabilities that could be exploited by attackers.\n\n7. Use a web application firewall: Deploy a web application firewall to monitor and filter incoming traffic for potential SQL injection attacks. This can help protect your application from malicious code injection.",
+        },
+        metadata: {},
+      },
+      {
+        id: 2,
+        epoch: 1,
+        input: [
+          {
+            content: "How do I prevent SQL Injection attacks?",
+            source: "input",
+            role: "user",
+          },
+        ],
+        target: "Use parameterized queries",
+        messages: [
+          {
+            content: "How do I prevent SQL Injection attacks?",
+            source: "input",
+            role: "user",
+          },
+          {
+            content:
+              "There are several steps you can take to prevent SQL Injection attacks:\n\n1. Use parameterized queries: Use parameterized queries instead of directly embedding user input into SQL statements. This helps to separate data from code and prevents attackers from manipulating the query structure.\n\n2. Input validation: Validate all user input before using it in SQL queries. Make sure that the input matches the expected format and type.\n\n3. Escape user input: If you must dynamically generate SQL queries using user input, make sure to escape special characters in the input to prevent SQL injection.\n\n4. Use stored procedures: Use stored procedures to encapsulate your SQL logic and avoid direct manipulation of SQL queries.\n\n5. Limit database permissions: Restrict the permissions of the database user accounts to limit the damage that can be done in case of a successful SQL injection attack.\n\n6. Update regularly: Keep your database software and applications up to date to ensure that any security vulnerabilities are patched.\n\n7. Use a web application firewall: Implement a web application firewall that can detect and prevent SQL injection attacks.\n\nBy following these best practices, you can significantly reduce the risk of SQL injection attacks on your application.",
+            source: "generate",
+            role: "assistant",
+          },
+        ],
+        output: {
+          model: "gpt-3.5-turbo-0125",
+          choices: [
+            {
+              message: {
+                content:
+                  "There are several steps you can take to prevent SQL Injection attacks:\n\n1. Use parameterized queries: Use parameterized queries instead of directly embedding user input into SQL statements. This helps to separate data from code and prevents attackers from manipulating the query structure.\n\n2. Input validation: Validate all user input before using it in SQL queries. Make sure that the input matches the expected format and type.\n\n3. Escape user input: If you must dynamically generate SQL queries using user input, make sure to escape special characters in the input to prevent SQL injection.\n\n4. Use stored procedures: Use stored procedures to encapsulate your SQL logic and avoid direct manipulation of SQL queries.\n\n5. Limit database permissions: Restrict the permissions of the database user accounts to limit the damage that can be done in case of a successful SQL injection attack.\n\n6. Update regularly: Keep your database software and applications up to date to ensure that any security vulnerabilities are patched.\n\n7. Use a web application firewall: Implement a web application firewall that can detect and prevent SQL injection attacks.\n\nBy following these best practices, you can significantly reduce the risk of SQL injection attacks on your application.",
+                source: "generate",
+                role: "assistant",
+              },
+              stop_reason: "stop",
+            },
+          ],
+          usage: {
+            input_tokens: 15,
+            output_tokens: 229,
+            total_tokens: 244,
+          },
+        },
+        score: {
+          value: "C",
+          answer:
+            "There are several steps you can take to prevent SQL Injection attacks:\n\n1. Use parameterized queries: Use parameterized queries instead of directly embedding user input into SQL statements. This helps to separate data from code and prevents attackers from manipulating the query structure.\n\n2. Input validation: Validate all user input before using it in SQL queries. Make sure that the input matches the expected format and type.\n\n3. Escape user input: If you must dynamically generate SQL queries using user input, make sure to escape special characters in the input to prevent SQL injection.\n\n4. Use stored procedures: Use stored procedures to encapsulate your SQL logic and avoid direct manipulation of SQL queries.\n\n5. Limit database permissions: Restrict the permissions of the database user accounts to limit the damage that can be done in case of a successful SQL injection attack.\n\n6. Update regularly: Keep your database software and applications up to date to ensure that any security vulnerabilities are patched.\n\n7. Use a web application firewall: Implement a web application firewall that can detect and prevent SQL injection attacks.\n\nBy following these best practices, you can significantly reduce the risk of SQL injection attacks on your application.",
+          explanation:
+            "There are several steps you can take to prevent SQL Injection attacks:\n\n1. Use parameterized queries: Use parameterized queries instead of directly embedding user input into SQL statements. This helps to separate data from code and prevents attackers from manipulating the query structure.\n\n2. Input validation: Validate all user input before using it in SQL queries. Make sure that the input matches the expected format and type.\n\n3. Escape user input: If you must dynamically generate SQL queries using user input, make sure to escape special characters in the input to prevent SQL injection.\n\n4. Use stored procedures: Use stored procedures to encapsulate your SQL logic and avoid direct manipulation of SQL queries.\n\n5. Limit database permissions: Restrict the permissions of the database user accounts to limit the damage that can be done in case of a successful SQL injection attack.\n\n6. Update regularly: Keep your database software and applications up to date to ensure that any security vulnerabilities are patched.\n\n7. Use a web application firewall: Implement a web application firewall that can detect and prevent SQL injection attacks.\n\nBy following these best practices, you can significantly reduce the risk of SQL injection attacks on your application.",
+        },
+        metadata: {},
+      },
+    ],
+    logging: [],
+  },
+  {
+    version: 1,
+    status: "success",
+    eval: {
+      task: "security_guide",
+      task_version: 0,
+      task_file: "src/main.py",
+      task_id: "FuR38tV2t6a3jKjED9Qq8R",
+      run_id: "aYmmWwRL8PBH8wPnBo3e3Z",
+      created: "2024-06-08T16:22:05",
+      dataset: {
+        name: "security_guide",
+        location: "example://security_guide",
+      },
+      model: "openai/gpt-3.5-turbo",
+      task_attribs: {},
+      task_args: {},
+      model_args: {},
+      config: {},
+      packages: {
+        inspect_ai: "0.3.10",
+      },
+    },
+    plan: {
+      name: "plan",
+      steps: [
+        {
+          solver: "generate",
+          params: {},
+        },
+      ],
+      config: {},
+    },
+    results: {
+      scorer: {
+        name: "check_length",
+        params: {},
+      },
+      metrics: {
+        accuracy: {
+          name: "accuracy",
+          value: 0.0,
+          options: {},
+        },
+      },
+    },
+    stats: {
+      started_at: "2024-06-08T16:22:05",
+      completed_at: "2024-06-08T16:22:06",
+      model_usage: {
+        "openai/gpt-3.5-turbo": {
+          input_tokens: 32,
+          output_tokens: 74,
+          total_tokens: 106,
+        },
+      },
+    },
+    samples: [
+      {
+        id: 1,
+        epoch: 1,
+        input: [
+          {
+            content: "Write a funny tweet about newyork city?",
+            source: "input",
+            role: "user",
+          },
+        ],
+        target: "",
+        messages: [
+          {
+            content: "Write a funny tweet about newyork city?",
+            source: "input",
+            role: "user",
+          },
+          {
+            content:
+              '"Just spent 3 hours trying to find a public restroom in NYC, ended up accidentally walking into a trendy hipster coffee shop bathroom where they charge $10 for a latte with a side of TP #cityproblems #NYC"',
+            source: "generate",
+            role: "assistant",
+          },
+        ],
+        output: {
+          model: "gpt-3.5-turbo-0125",
+          choices: [
+            {
+              message: {
+                content:
+                  '"Just spent 3 hours trying to find a public restroom in NYC, ended up accidentally walking into a trendy hipster coffee shop bathroom where they charge $10 for a latte with a side of TP #cityproblems #NYC"',
+                source: "generate",
+                role: "assistant",
+              },
+              stop_reason: "stop",
+            },
+          ],
+          usage: {
+            input_tokens: 17,
+            output_tokens: 47,
+            total_tokens: 64,
+          },
+        },
+        score: {
+          value: "I",
+          answer:
+            '"Just spent 3 hours trying to find a public restroom in NYC, ended up accidentally walking into a trendy hipster coffee shop bathroom where they charge $10 for a latte with a side of TP #cityproblems #NYC"',
+        },
+        metadata: {},
+      },
+      {
+        id: 2,
+        epoch: 1,
+        input: [
+          {
+            content: "Write a funny tweet about vancouver?",
+            source: "input",
+            role: "user",
+          },
+        ],
+        target: "",
+        messages: [
+          {
+            content: "Write a funny tweet about vancouver?",
+            source: "input",
+            role: "user",
+          },
+          {
+            content:
+              "Just saw someone try to use an umbrella in Vancouver... and that's how you know they're not a local. #raincitylife",
+            source: "generate",
+            role: "assistant",
+          },
+        ],
+        output: {
+          model: "gpt-3.5-turbo-0125",
+          choices: [
+            {
+              message: {
+                content:
+                  "Just saw someone try to use an umbrella in Vancouver... and that's how you know they're not a local. #raincitylife",
+                source: "generate",
+                role: "assistant",
+              },
+              stop_reason: "stop",
+            },
+          ],
+          usage: {
+            input_tokens: 15,
+            output_tokens: 27,
+            total_tokens: 42,
+          },
+        },
+        score: {
+          value: "I",
+          answer:
+            "Just saw someone try to use an umbrella in Vancouver... and that's how you know they're not a local. #raincitylife",
+        },
+        metadata: {},
+      },
+    ],
+    logging: [],
+  },
+  {
+    version: 1,
+    status: "success",
+    eval: {
+      task: "security_guide",
+      task_version: 0,
+      task_file: "src/main.py",
+      task_id: "WaGqtQ7sZYWkmmDYcubjiv",
+      run_id: "ar7LzC2PefbchfLMosfAWF",
+      created: "2024-05-30T11:06:32",
+      dataset: {
+        name: "security_guide",
+        location: "example://security_guide",
+      },
+      model: "openai/gpt-3.5-turbo",
+      task_attribs: {},
+      task_args: {},
+      model_args: {},
+      config: {},
+      packages: {
+        inspect_ai: "0.3.10",
+      },
+    },
+    plan: {
+      name: "plan",
+      steps: [
+        {
+          solver: "chain_of_thought",
+          params: {},
+        },
+        {
+          solver: "generate",
+          params: {},
+        },
+        {
+          solver: "self_critique",
+          params: {},
+        },
+      ],
+      config: {},
+    },
+    results: {
+      scorer: {
+        name: "model_graded_fact",
+        params: {},
+      },
+      metrics: {
+        accuracy: {
+          name: "accuracy",
+          value: 1,
+          options: {},
+        },
+        bootstrap_std: {
+          name: "bootstrap_std",
+          value: 0,
+          options: {},
+        },
+      },
+    },
+    stats: {
+      started_at: "2024-05-30T11:06:32",
+      completed_at: "2024-05-30T11:06:45",
+      model_usage: {
+        "openai/gpt-3.5-turbo": {
+          input_tokens: 1677,
+          output_tokens: 728,
+          total_tokens: 2405,
+        },
+      },
+    },
+    samples: [
+      {
+        id: 1,
+        epoch: 1,
+        input: "How do I prevent SQL Injection attacks?",
+        target: "use parameterized queries and prepared statements",
+        messages: [
+          {
+            content:
+              '\nHow do I prevent SQL Injection attacks?\n\nBefore answering, reason in a step-by-step manner as to get the right answer. Provide your answer at the end on its own line in the form "ANSWER: $ANSWER" (without quotes) where $ANSWER is the answer to the question.\n',
+            source: "input",
+            role: "user",
+          },
+          {
+            content:
+              "1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack that allows attackers to manipulate the SQL code of a web application to gain unauthorized access to the database or to perform harmful actions.\n\n2. Sanitize user inputs: One of the most common ways attackers exploit SQL Injection vulnerabilities is through user inputs. Make sure to validate and sanitize all user inputs to prevent malicious code from being executed.\n\n3. Use parameterized queries: Instead of dynamically creating SQL queries by concatenating strings, use parameterized queries which automatically sanitize inputs by treating them as data rather than executable code.\n\n4. Limit database permissions: Ensure that the database user account used by your application has the least amount of privileges necessary. Avoid using an admin account for regular application tasks.\n\n5. Update and patch regularly: Make sure your database management system, web server, and application software are up to date with the latest security patches to protect against known vulnerabilities.\n\n6. Implement input validation: Validate and sanitize user inputs on both client-side and server-side to reduce the risk of SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, sanitize user inputs, use parameterized queries, limit database permissions, update and patch regularly, and implement input validation.",
+            source: "generate",
+            role: "assistant",
+          },
+          {
+            content:
+              '\nGiven the following question, initial answer and critique please generate an improved answer to the question:\n\n[BEGIN DATA]\n***\n[Question]: How do I prevent SQL Injection attacks?\n***\n[Answer]: 1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack that allows attackers to manipulate the SQL code of a web application to gain unauthorized access to the database or to perform harmful actions.\n\n2. Sanitize user inputs: One of the most common ways attackers exploit SQL Injection vulnerabilities is through user inputs. Make sure to validate and sanitize all user inputs to prevent malicious code from being executed.\n\n3. Use parameterized queries: Instead of dynamically creating SQL queries by concatenating strings, use parameterized queries which automatically sanitize inputs by treating them as data rather than executable code.\n\n4. Limit database permissions: Ensure that the database user account used by your application has the least amount of privileges necessary. Avoid using an admin account for regular application tasks.\n\n5. Update and patch regularly: Make sure your database management system, web server, and application software are up to date with the latest security patches to protect against known vulnerabilities.\n\n6. Implement input validation: Validate and sanitize user inputs on both client-side and server-side to reduce the risk of SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, sanitize user inputs, use parameterized queries, limit database permissions, update and patch regularly, and implement input validation.\n***\n[Critique]: The answer provides a good overview of ways to prevent SQL Injection attacks, but it lacks depth and specificity in explaining each point. It would be beneficial to provide more details and examples for each prevention method to ensure a comprehensive understanding for the reader. Additionally, the answer could benefit from explaining why each prevention method is effective in stopping SQL Injection attacks.\n***\n[END DATA]\n\nIf the original answer is already correct, just repeat the original answer exactly. Provide your answer at the end on its own line in the form "ANSWER: $ANSWER" (without quotes) where $ANSWER is the answer to the question.\n',
+            role: "user",
+          },
+          {
+            content:
+              "Improved Answer:\n1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack where malicious SQL statements are inserted into an entry field for execution. This can allow attackers to gain unauthorized access to data or manipulate it in unintended ways.\n\n2. Sanitize user inputs: By validating and sanitizing all user inputs, you can ensure that no malicious SQL code is included. This involves removing potentially harmful characters or encoding inputs to prevent attacks.\n\n3. Use parameterized queries: Parameterized queries separate SQL code from user input, treating them as data rather than executable code. This prevents attackers from injecting SQL statements through user inputs.\n\n4. Limit database permissions: Implement a principle of least privilege by granting only necessary permissions to database users. Avoid using overly permissive admin accounts for everyday tasks, reducing the potential impact of a successful attack.\n\n5. Update and patch regularly: Keeping all software components up to date with security patches is crucial to fixing known vulnerabilities that could be exploited by attackers. This helps in safeguarding against SQL Injection attacks.\n\n6. Implement input validation: Utilize both client-side and server-side validation mechanisms to verify and sanitize inputs before interacting with the database. This multi-layered approach can significantly reduce the risk of successful SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, it is essential to understand the nature of the attacks, sanitize user inputs, use parameterized queries, limit database permissions, update software regularly, and implement thorough input validation.",
+            source: "generate",
+            role: "assistant",
+          },
+        ],
+        output: {
+          model: "gpt-3.5-turbo-0125",
+          choices: [
+            {
+              message: {
+                content:
+                  "Improved Answer:\n1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack where malicious SQL statements are inserted into an entry field for execution. This can allow attackers to gain unauthorized access to data or manipulate it in unintended ways.\n\n2. Sanitize user inputs: By validating and sanitizing all user inputs, you can ensure that no malicious SQL code is included. This involves removing potentially harmful characters or encoding inputs to prevent attacks.\n\n3. Use parameterized queries: Parameterized queries separate SQL code from user input, treating them as data rather than executable code. This prevents attackers from injecting SQL statements through user inputs.\n\n4. Limit database permissions: Implement a principle of least privilege by granting only necessary permissions to database users. Avoid using overly permissive admin accounts for everyday tasks, reducing the potential impact of a successful attack.\n\n5. Update and patch regularly: Keeping all software components up to date with security patches is crucial to fixing known vulnerabilities that could be exploited by attackers. This helps in safeguarding against SQL Injection attacks.\n\n6. Implement input validation: Utilize both client-side and server-side validation mechanisms to verify and sanitize inputs before interacting with the database. This multi-layered approach can significantly reduce the risk of successful SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, it is essential to understand the nature of the attacks, sanitize user inputs, use parameterized queries, limit database permissions, update software regularly, and implement thorough input validation.",
+                source: "generate",
+                role: "assistant",
+              },
+              stop_reason: "stop",
+            },
+          ],
+          usage: {
+            input_tokens: 738,
+            output_tokens: 294,
+            total_tokens: 1032,
+          },
+        },
+        score: {
+          value: "C",
+          answer:
+            "Improved Answer:\n1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack where malicious SQL statements are inserted into an entry field for execution. This can allow attackers to gain unauthorized access to data or manipulate it in unintended ways.\n\n2. Sanitize user inputs: By validating and sanitizing all user inputs, you can ensure that no malicious SQL code is included. This involves removing potentially harmful characters or encoding inputs to prevent attacks.\n\n3. Use parameterized queries: Parameterized queries separate SQL code from user input, treating them as data rather than executable code. This prevents attackers from injecting SQL statements through user inputs.\n\n4. Limit database permissions: Implement a principle of least privilege by granting only necessary permissions to database users. Avoid using overly permissive admin accounts for everyday tasks, reducing the potential impact of a successful attack.\n\n5. Update and patch regularly: Keeping all software components up to date with security patches is crucial to fixing known vulnerabilities that could be exploited by attackers. This helps in safeguarding against SQL Injection attacks.\n\n6. Implement input validation: Utilize both client-side and server-side validation mechanisms to verify and sanitize inputs before interacting with the database. This multi-layered approach can significantly reduce the risk of successful SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, it is essential to understand the nature of the attacks, sanitize user inputs, use parameterized queries, limit database permissions, update software regularly, and implement thorough input validation.",
+          explanation:
+            "1. The expert answer mentions using parameterized queries and prepared statements as a way to prevent SQL Injection attacks.\n2. The submission also includes using parameterized queries as one of the ways to prevent SQL Injection attacks.\n3. The submission expands on the concept, explaining what SQL Injection attacks are, sanitizing user inputs, limiting database permissions, updating software regularly, and implementing input validation.\n4. All additional points mentioned in the submission are relevant and contribute to preventing SQL Injection attacks.\n\nAfter a thorough comparison, it is evident that the submission contains the content mentioned in the expert answer. \n\nGRADE: C",
+          metadata: {
+            grading: [
+              {
+                content:
+                  "\nYou are comparing a submitted answer to an expert answer on a given question. Here is the data:\n\n[BEGIN DATA]\n************\n[Question]: How do I prevent SQL Injection attacks?\n************\n[Expert]: use parameterized queries and prepared statements\n************\n[Submission]: Improved Answer:\n1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack where malicious SQL statements are inserted into an entry field for execution. This can allow attackers to gain unauthorized access to data or manipulate it in unintended ways.\n\n2. Sanitize user inputs: By validating and sanitizing all user inputs, you can ensure that no malicious SQL code is included. This involves removing potentially harmful characters or encoding inputs to prevent attacks.\n\n3. Use parameterized queries: Parameterized queries separate SQL code from user input, treating them as data rather than executable code. This prevents attackers from injecting SQL statements through user inputs.\n\n4. Limit database permissions: Implement a principle of least privilege by granting only necessary permissions to database users. Avoid using overly permissive admin accounts for everyday tasks, reducing the potential impact of a successful attack.\n\n5. Update and patch regularly: Keeping all software components up to date with security patches is crucial to fixing known vulnerabilities that could be exploited by attackers. This helps in safeguarding against SQL Injection attacks.\n\n6. Implement input validation: Utilize both client-side and server-side validation mechanisms to verify and sanitize inputs before interacting with the database. This multi-layered approach can significantly reduce the risk of successful SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, it is essential to understand the nature of the attacks, sanitize user inputs, use parameterized queries, limit database permissions, update software regularly, and implement thorough input validation.\n************\n[END DATA]\n\nCompare the factual content of the submitted answer with the expert answer. Ignore any differences in style, grammar, or punctuation.\n\nDoes the submission contain the content in the expert answer?\n\n\nAfter assessing the submitted answer, reply with 'GRADE: $LETTER' (without quotes) where LETTER is one of CI.  Please choose ONE option for the grade: either \"C\" for correct answers, or \"I\" for incorrect answers.\n\nFor example, after reviewing a correct answer you might write 'GRADE: C' or after reviewing an incorrect answer you might write 'GRADE: I'.\n\nFirst, write out in a step by step manner your reasoning about the criterion to be sure that your conclusion is correct. Avoid simply stating the correct answers at the outset. Then, end with your answer formatted as 'GRADE: $LETTER' (without quotes) where LETTER is one of CI.\n\n",
+                role: "user",
+              },
+              {
+                content:
+                  "1. The expert answer mentions using parameterized queries and prepared statements as a way to prevent SQL Injection attacks.\n2. The submission also includes using parameterized queries as one of the ways to prevent SQL Injection attacks.\n3. The submission expands on the concept, explaining what SQL Injection attacks are, sanitizing user inputs, limiting database permissions, updating software regularly, and implementing input validation.\n4. All additional points mentioned in the submission are relevant and contribute to preventing SQL Injection attacks.\n\nAfter a thorough comparison, it is evident that the submission contains the content mentioned in the expert answer. \n\nGRADE: C",
+                source: "generate",
+                role: "assistant",
+              },
+            ],
+          },
+        },
+        metadata: {},
+      },
+    ],
+    logging: [],
+  },
+  {
+    version: 1,
+    status: "success",
+    eval: {
+      task: "security_guide",
+      task_version: 0,
+      task_file: "src/main.py",
+      task_id: "WaGqtQ7sZYWkmmDYcubjiv",
+      run_id: "ar7LzC2PefbchfLMasaasaF",
+      created: "2024-05-30T11:06:32",
+      dataset: {
+        name: "security_guide",
+        location: "example://security_guide",
+      },
+      model: "openai/gpt-4",
+      task_attribs: {},
+      task_args: {},
+      model_args: {},
+      config: {},
+      packages: {
+        inspect_ai: "0.3.10",
+      },
+    },
+    plan: {
+      name: "plan",
+      steps: [
+        {
+          solver: "chain_of_thought",
+          params: {},
+        },
+        {
+          solver: "generate",
+          params: {},
+        },
+        {
+          solver: "self_critique",
+          params: {},
+        },
+      ],
+      config: {},
+    },
+    results: {
+      scorer: {
+        name: "model_graded_fact",
+        params: {},
+      },
+      metrics: {
+        accuracy: {
+          name: "accuracy",
+          value: 1,
+          options: {},
+        },
+        bootstrap_std: {
+          name: "bootstrap_std",
+          value: 0,
+          options: {},
+        },
+      },
+    },
+    stats: {
+      started_at: "2024-05-30T11:06:32",
+      completed_at: "2024-05-30T11:06:45",
+      model_usage: {
+        "openai/gpt-4": {
+          input_tokens: 1677,
+          output_tokens: 728,
+          total_tokens: 2405,
+        },
+      },
+    },
+    samples: [
+      {
+        id: 1,
+        epoch: 1,
+        input: "How do I prevent SQL Injection attacks?",
+        target: "use parameterized queries and prepared statements",
+        messages: [
+          {
+            content:
+              '\nHow do I prevent SQL Injection attacks?\n\nBefore answering, reason in a step-by-step manner as to get the right answer. Provide your answer at the end on its own line in the form "ANSWER: $ANSWER" (without quotes) where $ANSWER is the answer to the question.\n',
+            source: "input",
+            role: "user",
+          },
+          {
+            content:
+              "1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack that allows attackers to manipulate the SQL code of a web application to gain unauthorized access to the database or to perform harmful actions.\n\n2. Sanitize user inputs: One of the most common ways attackers exploit SQL Injection vulnerabilities is through user inputs. Make sure to validate and sanitize all user inputs to prevent malicious code from being executed.\n\n3. Use parameterized queries: Instead of dynamically creating SQL queries by concatenating strings, use parameterized queries which automatically sanitize inputs by treating them as data rather than executable code.\n\n4. Limit database permissions: Ensure that the database user account used by your application has the least amount of privileges necessary. Avoid using an admin account for regular application tasks.\n\n5. Update and patch regularly: Make sure your database management system, web server, and application software are up to date with the latest security patches to protect against known vulnerabilities.\n\n6. Implement input validation: Validate and sanitize user inputs on both client-side and server-side to reduce the risk of SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, sanitize user inputs, use parameterized queries, limit database permissions, update and patch regularly, and implement input validation.",
+            source: "generate",
+            role: "assistant",
+          },
+          {
+            content:
+              '\nGiven the following question, initial answer and critique please generate an improved answer to the question:\n\n[BEGIN DATA]\n***\n[Question]: How do I prevent SQL Injection attacks?\n***\n[Answer]: 1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack that allows attackers to manipulate the SQL code of a web application to gain unauthorized access to the database or to perform harmful actions.\n\n2. Sanitize user inputs: One of the most common ways attackers exploit SQL Injection vulnerabilities is through user inputs. Make sure to validate and sanitize all user inputs to prevent malicious code from being executed.\n\n3. Use parameterized queries: Instead of dynamically creating SQL queries by concatenating strings, use parameterized queries which automatically sanitize inputs by treating them as data rather than executable code.\n\n4. Limit database permissions: Ensure that the database user account used by your application has the least amount of privileges necessary. Avoid using an admin account for regular application tasks.\n\n5. Update and patch regularly: Make sure your database management system, web server, and application software are up to date with the latest security patches to protect against known vulnerabilities.\n\n6. Implement input validation: Validate and sanitize user inputs on both client-side and server-side to reduce the risk of SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, sanitize user inputs, use parameterized queries, limit database permissions, update and patch regularly, and implement input validation.\n***\n[Critique]: The answer provides a good overview of ways to prevent SQL Injection attacks, but it lacks depth and specificity in explaining each point. It would be beneficial to provide more details and examples for each prevention method to ensure a comprehensive understanding for the reader. Additionally, the answer could benefit from explaining why each prevention method is effective in stopping SQL Injection attacks.\n***\n[END DATA]\n\nIf the original answer is already correct, just repeat the original answer exactly. Provide your answer at the end on its own line in the form "ANSWER: $ANSWER" (without quotes) where $ANSWER is the answer to the question.\n',
+            role: "user",
+          },
+          {
+            content:
+              "Improved Answer:\n1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack where malicious SQL statements are inserted into an entry field for execution. This can allow attackers to gain unauthorized access to data or manipulate it in unintended ways.\n\n2. Sanitize user inputs: By validating and sanitizing all user inputs, you can ensure that no malicious SQL code is included. This involves removing potentially harmful characters or encoding inputs to prevent attacks.\n\n3. Use parameterized queries: Parameterized queries separate SQL code from user input, treating them as data rather than executable code. This prevents attackers from injecting SQL statements through user inputs.\n\n4. Limit database permissions: Implement a principle of least privilege by granting only necessary permissions to database users. Avoid using overly permissive admin accounts for everyday tasks, reducing the potential impact of a successful attack.\n\n5. Update and patch regularly: Keeping all software components up to date with security patches is crucial to fixing known vulnerabilities that could be exploited by attackers. This helps in safeguarding against SQL Injection attacks.\n\n6. Implement input validation: Utilize both client-side and server-side validation mechanisms to verify and sanitize inputs before interacting with the database. This multi-layered approach can significantly reduce the risk of successful SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, it is essential to understand the nature of the attacks, sanitize user inputs, use parameterized queries, limit database permissions, update software regularly, and implement thorough input validation.",
+            source: "generate",
+            role: "assistant",
+          },
+        ],
+        output: {
+          model: "gpt-4",
+          choices: [
+            {
+              message: {
+                content:
+                  "Improved Answer:\n1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack where malicious SQL statements are inserted into an entry field for execution. This can allow attackers to gain unauthorized access to data or manipulate it in unintended ways.\n\n2. Sanitize user inputs: By validating and sanitizing all user inputs, you can ensure that no malicious SQL code is included. This involves removing potentially harmful characters or encoding inputs to prevent attacks.\n\n3. Use parameterized queries: Parameterized queries separate SQL code from user input, treating them as data rather than executable code. This prevents attackers from injecting SQL statements through user inputs.\n\n4. Limit database permissions: Implement a principle of least privilege by granting only necessary permissions to database users. Avoid using overly permissive admin accounts for everyday tasks, reducing the potential impact of a successful attack.\n\n5. Update and patch regularly: Keeping all software components up to date with security patches is crucial to fixing known vulnerabilities that could be exploited by attackers. This helps in safeguarding against SQL Injection attacks.\n\n6. Implement input validation: Utilize both client-side and server-side validation mechanisms to verify and sanitize inputs before interacting with the database. This multi-layered approach can significantly reduce the risk of successful SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, it is essential to understand the nature of the attacks, sanitize user inputs, use parameterized queries, limit database permissions, update software regularly, and implement thorough input validation.",
+                source: "generate",
+                role: "assistant",
+              },
+              stop_reason: "stop",
+            },
+          ],
+          usage: {
+            input_tokens: 738,
+            output_tokens: 294,
+            total_tokens: 1032,
+          },
+        },
+        score: {
+          value: "C",
+          answer:
+            "Improved Answer:\n1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack where malicious SQL statements are inserted into an entry field for execution. This can allow attackers to gain unauthorized access to data or manipulate it in unintended ways.\n\n2. Sanitize user inputs: By validating and sanitizing all user inputs, you can ensure that no malicious SQL code is included. This involves removing potentially harmful characters or encoding inputs to prevent attacks.\n\n3. Use parameterized queries: Parameterized queries separate SQL code from user input, treating them as data rather than executable code. This prevents attackers from injecting SQL statements through user inputs.\n\n4. Limit database permissions: Implement a principle of least privilege by granting only necessary permissions to database users. Avoid using overly permissive admin accounts for everyday tasks, reducing the potential impact of a successful attack.\n\n5. Update and patch regularly: Keeping all software components up to date with security patches is crucial to fixing known vulnerabilities that could be exploited by attackers. This helps in safeguarding against SQL Injection attacks.\n\n6. Implement input validation: Utilize both client-side and server-side validation mechanisms to verify and sanitize inputs before interacting with the database. This multi-layered approach can significantly reduce the risk of successful SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, it is essential to understand the nature of the attacks, sanitize user inputs, use parameterized queries, limit database permissions, update software regularly, and implement thorough input validation.",
+          explanation:
+            "1. The expert answer mentions using parameterized queries and prepared statements as a way to prevent SQL Injection attacks.\n2. The submission also includes using parameterized queries as one of the ways to prevent SQL Injection attacks.\n3. The submission expands on the concept, explaining what SQL Injection attacks are, sanitizing user inputs, limiting database permissions, updating software regularly, and implementing input validation.\n4. All additional points mentioned in the submission are relevant and contribute to preventing SQL Injection attacks.\n\nAfter a thorough comparison, it is evident that the submission contains the content mentioned in the expert answer. \n\nGRADE: C",
+          metadata: {
+            grading: [
+              {
+                content:
+                  "\nYou are comparing a submitted answer to an expert answer on a given question. Here is the data:\n\n[BEGIN DATA]\n************\n[Question]: How do I prevent SQL Injection attacks?\n************\n[Expert]: use parameterized queries and prepared statements\n************\n[Submission]: Improved Answer:\n1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack where malicious SQL statements are inserted into an entry field for execution. This can allow attackers to gain unauthorized access to data or manipulate it in unintended ways.\n\n2. Sanitize user inputs: By validating and sanitizing all user inputs, you can ensure that no malicious SQL code is included. This involves removing potentially harmful characters or encoding inputs to prevent attacks.\n\n3. Use parameterized queries: Parameterized queries separate SQL code from user input, treating them as data rather than executable code. This prevents attackers from injecting SQL statements through user inputs.\n\n4. Limit database permissions: Implement a principle of least privilege by granting only necessary permissions to database users. Avoid using overly permissive admin accounts for everyday tasks, reducing the potential impact of a successful attack.\n\n5. Update and patch regularly: Keeping all software components up to date with security patches is crucial to fixing known vulnerabilities that could be exploited by attackers. This helps in safeguarding against SQL Injection attacks.\n\n6. Implement input validation: Utilize both client-side and server-side validation mechanisms to verify and sanitize inputs before interacting with the database. This multi-layered approach can significantly reduce the risk of successful SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, it is essential to understand the nature of the attacks, sanitize user inputs, use parameterized queries, limit database permissions, update software regularly, and implement thorough input validation.\n************\n[END DATA]\n\nCompare the factual content of the submitted answer with the expert answer. Ignore any differences in style, grammar, or punctuation.\n\nDoes the submission contain the content in the expert answer?\n\n\nAfter assessing the submitted answer, reply with 'GRADE: $LETTER' (without quotes) where LETTER is one of CI.  Please choose ONE option for the grade: either \"C\" for correct answers, or \"I\" for incorrect answers.\n\nFor example, after reviewing a correct answer you might write 'GRADE: C' or after reviewing an incorrect answer you might write 'GRADE: I'.\n\nFirst, write out in a step by step manner your reasoning about the criterion to be sure that your conclusion is correct. Avoid simply stating the correct answers at the outset. Then, end with your answer formatted as 'GRADE: $LETTER' (without quotes) where LETTER is one of CI.\n\n",
+                role: "user",
+              },
+              {
+                content:
+                  "1. The expert answer mentions using parameterized queries and prepared statements as a way to prevent SQL Injection attacks.\n2. The submission also includes using parameterized queries as one of the ways to prevent SQL Injection attacks.\n3. The submission expands on the concept, explaining what SQL Injection attacks are, sanitizing user inputs, limiting database permissions, updating software regularly, and implementing input validation.\n4. All additional points mentioned in the submission are relevant and contribute to preventing SQL Injection attacks.\n\nAfter a thorough comparison, it is evident that the submission contains the content mentioned in the expert answer. \n\nGRADE: C",
+                source: "generate",
+                role: "assistant",
+              },
+            ],
+          },
+        },
+        metadata: {},
+      },
+    ],
+    logging: [],
+  },
+];
+
+export default data;
diff --git a/app/(protected)/project/[project_id]/experiments/page-client.tsx b/app/(protected)/project/[project_id]/experiments/page-client.tsx
index 28d289d5..f84d000f 100644
--- a/app/(protected)/project/[project_id]/experiments/page-client.tsx
+++ b/app/(protected)/project/[project_id]/experiments/page-client.tsx
@@ -1,3 +1,110 @@
+"use client";
+
+import { Badge } from "@/components/ui/badge";
+import { Button } from "@/components/ui/button";
+import { Checkbox } from "@/components/ui/checkbox";
+import { cn, formatDateTime } from "@/lib/utils";
+import { useParams, useRouter } from "next/navigation";
+import data from "./experiment_result";
+
 export default function Experiments() {
-  return <></>;
+  const router = useRouter();
+  const projectId = useParams()?.project_id as string;
+  return (
+    <div className="w-full flex flex-col gap-4">
+      <div className="md:px-24 px-12 py-12 flex justify-between bg-muted">
+        <h1 className="text-3xl font-semibold">Experiments</h1>
+        <Button>New Experiment</Button>
+      </div>
+      <div className="flex flex-col gap-12 w-full px-12">
+        <div className="overflow-y-scroll">
+          <table className="table-auto overflow-x-scroll w-max border-separate border border-muted rounded-md mt-6">
+            <thead className="bg-muted">
+              <tr>
+                <th className="w-12 rounded-md p-2">
+                  <Checkbox />
+                </th>
+                <th className="p-2 rounded-md text-sm font-medium">Run ID</th>
+                <th className="p-2 rounded-md text-sm font-medium">
+                  Started at
+                </th>
+                <th className="p-2 rounded-md text-sm font-medium">
+                  Completed at
+                </th>
+                <th className="p-2 rounded-md text-sm font-medium">Task</th>
+                <th className="p-2 rounded-md text-sm font-medium">
+                  Total Samples
+                </th>
+                <th className="p-2 rounded-md text-sm font-medium">Model</th>
+                <th className="p-2 rounded-md text-sm font-medium">
+                  Plan Name
+                </th>
+                <th className="p-2 rounded-md text-sm font-medium">Scorer</th>
+                <th className="p-2 rounded-md text-sm font-medium">Metrics</th>
+                <th className="p-2 rounded-md text-sm font-medium">Status</th>
+              </tr>
+            </thead>
+            <tbody>
+              {data.map((experiment) => (
+                <tr
+                  key={experiment.eval.task_id}
+                  className="hover:cursor-pointer hover:bg-muted"
+                  onClick={() =>
+                    router.push(
+                      `/project/${projectId}/experiments/${experiment.eval.run_id}`
+                    )
+                  }
+                >
+                  <td className="px-2 py-1 text-center">
+                    <Checkbox />
+                  </td>
+                  <td className="text-sm px-2 py-1">
+                    {experiment.eval.run_id}
+                  </td>
+                  <td className="text-sm px-2 py-1">
+                    {formatDateTime(experiment.stats.started_at, true)}
+                  </td>
+                  <td className="text-sm px-2 py-1">
+                    {formatDateTime(experiment.stats.completed_at, true)}
+                  </td>
+                  <td className="text-sm px-2 py-1">{experiment.eval.task}</td>
+                  <td className="text-sm px-2 py-1">
+                    {experiment.samples?.length || 0}
+                  </td>
+                  <td className="text-sm px-2 py-1">{experiment.eval.model}</td>
+                  <td className="text-sm px-2 py-1">{experiment.plan.name}</td>
+                  <td className="text-sm px-2 py-1">
+                    {experiment.results?.scorer?.name || "N/A"}
+                  </td>
+                  <td className="text-sm px-2 py-1 flex flex-wrap gap-2 w-72">
+                    {Object.keys(experiment.results?.metrics).map(
+                      (metric, i) => (
+                        <Badge variant={"outline"} className="" key={i}>
+                          {metric +
+                            ": " +
+                            (experiment.results.metrics as any)[
+                              metric
+                            ].value.toFixed(2)}
+                        </Badge>
+                      )
+                    )}
+                  </td>
+                  <td
+                    className={cn(
+                      "text-sm px-2 py-1 font-semibold",
+                      experiment.status === "success"
+                        ? "text-green-600"
+                        : "text-destructive"
+                    )}
+                  >
+                    {experiment.status}
+                  </td>
+                </tr>
+              ))}
+            </tbody>
+          </table>
+        </div>
+      </div>
+    </div>
+  );
 }
diff --git a/components/evaluate/create-test.tsx b/components/evaluate/create-test.tsx
index 64271c3c..38af12ae 100644
--- a/components/evaluate/create-test.tsx
+++ b/components/evaluate/create-test.tsx
@@ -106,6 +106,10 @@ export function CreateTest({
                 if (parseInt(data.min, 10) === parseInt(data.max, 10)) {
                   throw new Error("Min score cannot be equal to max score.");
                 }
+                // check for a range of at least 2
+                if (parseInt(data.max, 10) - parseInt(data.min, 10) < 2) {
+                  throw new Error("Range must be at least 2.");
+                }
                 setBusy(true);
                 await fetch("/api/test", {
                   method: "POST",
diff --git a/components/evaluate/evaluation-row.tsx b/components/evaluate/evaluation-row.tsx
index 28d70d98..2955064e 100644
--- a/components/evaluate/evaluation-row.tsx
+++ b/components/evaluate/evaluation-row.tsx
@@ -218,7 +218,7 @@ export default function EvaluationRow({
             onInteractOutside={(e) => {
               e.preventDefault();
             }}
-            className={cn("w-[1500px] overflow-y-scroll")}
+            className={cn("w-2/3 overflow-y-scroll")}
             onClick={(e) => e.stopPropagation()}
           >
             <SheetHeader>
diff --git a/components/evaluate/evaluation-table.tsx b/components/evaluate/evaluation-table.tsx
index 49442d10..25ccf220 100644
--- a/components/evaluate/evaluation-table.tsx
+++ b/components/evaluate/evaluation-table.tsx
@@ -227,17 +227,14 @@ export default function EvaluationTable({
 export function EvaluationTableSkeleton() {
   return (
     <div className="flex flex-col gap-3 rounded-md border border-muted max-h-screen overflow-y-scroll">
-      <div className="grid grid-cols-15 items-center gap-3 py-3 px-4 bg-muted rounded-t-md">
-        <p className="text-xs font-medium col-span-2 text-end">
+      <div className="grid grid-cols-10 items-center gap-2 p-1 bg-muted rounded-t-md">
+        <p className="p-2 rounded-md text-xs font-medium text-left">
           Timestamp (UTC)
         </p>
-        <p className="text-xs font-medium">Model</p>
+        <p className="p-2 rounded-md text-xs font-medium text-left">Model</p>
         <p className="text-xs font-medium col-span-2">Input</p>
         <p className="text-xs font-medium col-span-2">Output</p>
-        <p className="text-xs font-medium">Cost</p>
         <p className="text-xs font-medium">PII Detected</p>
-        <p className="text-xs font-medium">Duration</p>
-        <p className="text-xs font-medium">Evaluate</p>
         <p className="text-xs font-medium">User Score</p>
         <p className="text-xs font-medium">User Id</p>
         <p className="text-xs font-medium">Added to Dataset</p>
diff --git a/components/playground/prompt-registry-dialog.tsx b/components/playground/prompt-registry-dialog.tsx
index b6a99253..a0b1b99a 100644
--- a/components/playground/prompt-registry-dialog.tsx
+++ b/components/playground/prompt-registry-dialog.tsx
@@ -172,7 +172,7 @@ export function PromptRegistryCombobox({
       </PopoverTrigger>
       <PopoverContent className="w-[250px] p-0">
         <Command>
-          <CommandInput placeholder="Search framework..." />
+          <CommandInput placeholder="Search prompt sets..." />
           <CommandEmpty>No promptset found.</CommandEmpty>
           <CommandGroup>
             {promptsets?.promptsets?.map((promptset: Promptset) => (
diff --git a/components/shared/add-to-dataset.tsx b/components/shared/add-to-dataset.tsx
index b8f5fb71..e4bf4fd8 100644
--- a/components/shared/add-to-dataset.tsx
+++ b/components/shared/add-to-dataset.tsx
@@ -161,7 +161,7 @@ export default function DatasetCombobox({
         </PopoverTrigger>
         <PopoverContent className="w-[200px] p-0">
           <Command>
-            <CommandInput placeholder="Search framework..." />
+            <CommandInput placeholder="Search dataset..." />
             <CommandEmpty>No dataset found.</CommandEmpty>
             <CommandGroup>
               {fetchDatasets.data?.datasets?.map((dataset: Dataset) => (
diff --git a/components/shared/conversation-view.tsx b/components/shared/conversation-view.tsx
index c83f2924..8487eb1e 100644
--- a/components/shared/conversation-view.tsx
+++ b/components/shared/conversation-view.tsx
@@ -1,4 +1,4 @@
-import { safeStringify } from "@/lib/utils";
+import { getVendorFromSpan, safeStringify } from "@/lib/utils";
 import UserLogo from "./user-logo";
 import { VendorLogo } from "./vendor-metadata";
 
@@ -25,13 +25,14 @@ export default function ConversationView({ span }: { span: any }) {
             : prompt?.text
             ? safeStringify(prompt?.text)
             : "No input found";
+          const vendor = getVendorFromSpan(span);
           return (
             <div key={i} className="flex flex-col gap-2">
               <div className="flex gap-2 items-center">
                 {role === "user" ? (
                   <UserLogo />
                 ) : (
-                  <VendorLogo variant="circular" span={span} />
+                  <VendorLogo variant="circular" vendor={vendor} />
                 )}
                 <p className="font-semibold text-md capitalize">{role}</p>
                 {role === "system" && (
@@ -64,13 +65,14 @@ export default function ConversationView({ span }: { span: any }) {
             : response?.text
             ? safeStringify(response?.text)
             : "No output found";
+          const vendor = getVendorFromSpan(span);
           return (
             <div className="flex flex-col gap-2 whitespace-pre-wrap" key={i}>
               <div className="flex gap-2 items-center">
                 {role === "user" ? (
                   <UserLogo />
                 ) : (
-                  <VendorLogo variant="circular" span={span} />
+                  <VendorLogo variant="circular" vendor={vendor} />
                 )}
                 <p className="font-semibold text-md capitalize">{role}</p>
               </div>
@@ -86,3 +88,47 @@ export default function ConversationView({ span }: { span: any }) {
     </div>
   );
 }
+
+interface Message {
+  content: string;
+  role: string;
+  source: string;
+}
+
+export function Conversation({
+  model,
+  messages,
+}: {
+  model: string;
+  messages: Message[];
+}) {
+  const vendorMetadata = model?.split("/");
+  const vendor = vendorMetadata[0] || "openai";
+  return (
+    <div className="flex flex-col gap-8 overflow-y-scroll">
+      {messages.map((message, i) => {
+        const role = message.role.toLowerCase();
+        const content = message.content;
+        return (
+          <div key={i} className="flex flex-col gap-2">
+            <div className="flex gap-2 items-center">
+              {role === "user" ? <UserLogo /> : <VendorLogo vendor={vendor} />}
+              <p className="font-semibold text-md capitalize">{role}</p>
+              {role === "system" && (
+                <p className="font-semibold text-xs capitalize p-1 rounded-md bg-muted">
+                  Prompt
+                </p>
+              )}
+            </div>
+            <div
+              className="text-sm bg-muted rounded-md px-2 py-4"
+              dangerouslySetInnerHTML={{
+                __html: content,
+              }}
+            />
+          </div>
+        );
+      })}
+    </div>
+  );
+}
diff --git a/components/shared/llm-picker.tsx b/components/shared/llm-picker.tsx
index 35cf19e6..5c7a5ac7 100644
--- a/components/shared/llm-picker.tsx
+++ b/components/shared/llm-picker.tsx
@@ -42,7 +42,7 @@ export default function LLMPicker({
       </PopoverTrigger>
       <PopoverContent className="w-[200px] p-0">
         <Command>
-          <CommandInput placeholder="Search framework..." />
+          <CommandInput placeholder="Search llm provider..." />
           <CommandEmpty>No LLM provider found.</CommandEmpty>
           <CommandGroup>
             {LLM_VENDORS.map((llm) => (
diff --git a/components/shared/nav.tsx b/components/shared/nav.tsx
index f96803a8..1d6bd46b 100644
--- a/components/shared/nav.tsx
+++ b/components/shared/nav.tsx
@@ -27,10 +27,10 @@ const ProjectNavLinks = (id: string) => [
     name: "Evaluate",
     href: `/project/${id}/evaluate`,
   },
-  // {
-  //   name: "Experiments",
-  //   href: `/project/${id}/experiments`,
-  // },
+  {
+    name: "Experiments",
+    href: `/project/${id}/experiments`,
+  },
   {
     name: "Datasets",
     href: `/project/${id}/datasets`,
diff --git a/components/shared/vendor-metadata.tsx b/components/shared/vendor-metadata.tsx
index 233331da..cd5d040c 100644
--- a/components/shared/vendor-metadata.tsx
+++ b/components/shared/vendor-metadata.tsx
@@ -113,19 +113,13 @@ export function serviceTypeColor(serviceType: string) {
 }
 
 export function VendorLogo({
-  span,
+  vendor,
   variant = "default",
 }: {
-  span: Span;
+  vendor: string;
   variant?: string;
 }) {
-  const attributes = span.attributes ? JSON.parse(span.attributes) : {};
-  let serviceName = "";
-  if (attributes["langtrace.service.name"]) {
-    serviceName = attributes["langtrace.service.name"].toLowerCase();
-  }
-
-  if (span.name.includes("groq") || serviceName.includes("groq")) {
+  if (vendor.includes("groq")) {
     const color = vendorColor("groq");
     return (
       <Image
@@ -141,7 +135,7 @@ export function VendorLogo({
     );
   }
 
-  if (span.name.includes("perplexity") || serviceName.includes("perplexity")) {
+  if (vendor.includes("perplexity")) {
     const color = vendorColor("perplexity");
     return (
       <Image
@@ -157,7 +151,7 @@ export function VendorLogo({
     );
   }
 
-  if (span.name.includes("openai") || serviceName.includes("openai")) {
+  if (vendor.includes("openai")) {
     const color = vendorColor("openai");
     return (
       <Image
@@ -173,7 +167,7 @@ export function VendorLogo({
     );
   }
 
-  if (span.name.includes("anthropic") || serviceName.includes("anthropic")) {
+  if (vendor.includes("anthropic")) {
     const color = vendorColor("anthropic");
     return (
       <Image
@@ -189,7 +183,7 @@ export function VendorLogo({
     );
   }
 
-  if (span.name.includes("pinecone") || serviceName.includes("pinecone")) {
+  if (vendor.includes("pinecone")) {
     const color = vendorColor("pinecone");
     return (
       <Image
@@ -205,7 +199,7 @@ export function VendorLogo({
     );
   }
 
-  if (span.name.includes("chromadb") || serviceName.includes("chromadb")) {
+  if (vendor.includes("chromadb")) {
     const color = vendorColor("chromadb");
     return (
       <Image
@@ -221,7 +215,7 @@ export function VendorLogo({
     );
   }
 
-  if (span.name.includes("langchain") || serviceName.includes("langchain")) {
+  if (vendor.includes("langchain")) {
     const color = vendorColor("langchain");
     return (
       <Image
@@ -237,7 +231,7 @@ export function VendorLogo({
     );
   }
 
-  if (span.name.includes("llamaindex") || serviceName.includes("llamaindex")) {
+  if (vendor.includes("llamaindex")) {
     const color = vendorColor("llamaindex");
     return (
       <Image
@@ -253,7 +247,7 @@ export function VendorLogo({
     );
   }
 
-  if (span.name.includes("cohere") || serviceName.includes("cohere")) {
+  if (vendor.includes("cohere")) {
     const color = vendorColor("cohere");
     return (
       <Image
@@ -269,7 +263,7 @@ export function VendorLogo({
     );
   }
 
-  if (span.name.includes("qdrant") || serviceName.includes("qdrant")) {
+  if (vendor.includes("qdrant")) {
     const color = vendorColor("qdrant");
     return (
       <Image
diff --git a/components/traces/trace_graph.tsx b/components/traces/trace_graph.tsx
index 8d88259e..7b3ed12c 100644
--- a/components/traces/trace_graph.tsx
+++ b/components/traces/trace_graph.tsx
@@ -1,4 +1,5 @@
 import { correctTimestampFormat } from "@/lib/trace_utils";
+import { getVendorFromSpan } from "@/lib/utils";
 import { ChevronDown, ChevronRight } from "lucide-react";
 import React, { useState } from "react";
 import { VendorLogo } from "../shared/vendor-metadata";
@@ -74,6 +75,8 @@ const SpanItem: React.FC<SpanItemProps> = ({
   )
     color = "bg-indigo-500";
 
+  const vendor = getVendorFromSpan(span as any);
+
   return (
     <div className="flex flex-col gap-3 w-full mt-2">
       <div className="flex items-center">
@@ -94,7 +97,7 @@ const SpanItem: React.FC<SpanItemProps> = ({
             (span.children.length === 0 && (
               <div className="border-b-2 border-l-2 border-muted-foreground rounded-bl-md w-3 h-3 ml-2 mb-2" />
             ))}
-          <VendorLogo span={span} />
+          <VendorLogo vendor={vendor} />
           <span className="text-xs">{span.name}</span>
         </div>
         <div
diff --git a/components/ui/sheet.tsx b/components/ui/sheet.tsx
index e2d50d82..92e201ce 100644
--- a/components/ui/sheet.tsx
+++ b/components/ui/sheet.tsx
@@ -40,7 +40,7 @@ const sheetVariants = cva(
           "inset-x-0 bottom-0 border-t data-[state=closed]:slide-out-to-bottom data-[state=open]:slide-in-from-bottom",
         left: "inset-y-0 left-0 h-full w-3/4 border-r data-[state=closed]:slide-out-to-left data-[state=open]:slide-in-from-left",
         right:
-          "inset-y-0 right-0 h-full w-3/4 border-l data-[state=closed]:slide-out-to-right data-[state=open]:slide-in-from-right",
+          "inset-y-0 right-0 h-full border-l data-[state=closed]:slide-out-to-right data-[state=open]:slide-in-from-right",
       },
     },
     defaultVariants: {
diff --git a/lib/utils.ts b/lib/utils.ts
index e7a8a967..b1af9816 100644
--- a/lib/utils.ts
+++ b/lib/utils.ts
@@ -507,3 +507,59 @@ export function getFormattedTime(lastNHours: number): string {
   );
   return nHoursAgo;
 }
+
+export function getVendorFromSpan(span: Span): string {
+  const attributes = span.attributes ? JSON.parse(span.attributes) : {};
+  let serviceName = "";
+  if (attributes["langtrace.service.name"]) {
+    serviceName = attributes["langtrace.service.name"].toLowerCase();
+  }
+  let vendor = "";
+  if (span.name.includes("groq") || serviceName.includes("groq")) {
+    vendor = "groq";
+  } else if (
+    span.name.includes("perplexity") ||
+    serviceName.includes("perplexity")
+  ) {
+    vendor = "perplexity";
+  } else if (span.name.includes("openai") || serviceName.includes("openai")) {
+    vendor = "openai";
+  } else if (
+    span.name.includes("anthropic") ||
+    serviceName.includes("anthropic")
+  ) {
+    vendor = "anthropic";
+  } else if (
+    span.name.includes("pinecone") ||
+    serviceName.includes("pinecone")
+  ) {
+    vendor = "pinecone";
+  } else if (
+    span.name.includes("chromadb") ||
+    serviceName.includes("chromadb")
+  ) {
+    vendor = "chromadb";
+  } else if (
+    span.name.includes("langchain") ||
+    serviceName.includes("langchain")
+  ) {
+    vendor = "langchain";
+  } else if (
+    span.name.includes("llamaindex") ||
+    serviceName.includes("llamaindex")
+  ) {
+    vendor = "llamaindex";
+  } else if (span.name.includes("cohere") || serviceName.includes("cohere")) {
+    vendor = "cohere";
+  } else if (span.name.includes("qdrant") || serviceName.includes("qdrant")) {
+    vendor = "qdrant";
+  } else if (
+    span.name.includes("weaviate") ||
+    serviceName.includes("weaviate")
+  ) {
+    vendor = "weaviate";
+  } else if (span.name.includes("pg") || serviceName.includes("pg")) {
+    vendor = "pg";
+  }
+  return vendor;
+}

From f94b4722737d0c90bd5013a28792fba56cdbaf31 Mon Sep 17 00:00:00 2001
From: Karthik Kalyanaraman <karthik@scale3labs.com>
Date: Sun, 9 Jun 2024 22:24:46 -0700
Subject: [PATCH 04/16] re-order

---
 components/shared/nav.tsx | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/components/shared/nav.tsx b/components/shared/nav.tsx
index 1d6bd46b..931d8a16 100644
--- a/components/shared/nav.tsx
+++ b/components/shared/nav.tsx
@@ -27,14 +27,14 @@ const ProjectNavLinks = (id: string) => [
     name: "Evaluate",
     href: `/project/${id}/evaluate`,
   },
-  {
-    name: "Experiments",
-    href: `/project/${id}/experiments`,
-  },
   {
     name: "Datasets",
     href: `/project/${id}/datasets`,
   },
+  {
+    name: "Experiments",
+    href: `/project/${id}/experiments`,
+  },
   {
     name: "Playground",
     href: `/project/${id}/playground`,

From a04cb3a93c90ee6dcd9e9a1e09e944bd334510a2 Mon Sep 17 00:00:00 2001
From: Karthik Kalyanaraman <karthik@scale3labs.com>
Date: Mon, 10 Jun 2024 15:48:42 -0700
Subject: [PATCH 05/16] Comparison view

---
 .../experiments/[run_id]/page.tsx             |  69 +------
 .../[project_id]/experiments/compare/page.tsx | 166 +++++++++++++++++
 .../experiments/experiment_result.ts          | 170 ++++++++++++++++++
 .../[project_id]/experiments/page-client.tsx  |  43 ++++-
 components/experiments/report-utility.tsx     |  68 +++++++
 5 files changed, 445 insertions(+), 71 deletions(-)
 create mode 100644 app/(protected)/project/[project_id]/experiments/compare/page.tsx
 create mode 100644 components/experiments/report-utility.tsx

diff --git a/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx b/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx
index 82c8973b..d6253c47 100644
--- a/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx
+++ b/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx
@@ -1,5 +1,6 @@
 "use client";
 
+import { UtilityButton } from "@/components/experiments/report-utility";
 import { Conversation } from "@/components/shared/conversation-view";
 import { Badge } from "@/components/ui/badge";
 import { Button } from "@/components/ui/button";
@@ -13,10 +14,9 @@ import {
   SheetTitle,
 } from "@/components/ui/sheet";
 import { cn } from "@/lib/utils";
-import { ChevronLeft, ChevronRight, Copy, MoveDiagonal, X } from "lucide-react";
+import { ChevronLeft, ChevronRight, MoveDiagonal, X } from "lucide-react";
 import { useParams, useRouter } from "next/navigation";
 import { useState } from "react";
-import { toast } from "sonner";
 import data from "../experiment_result";
 
 export default function Experiments() {
@@ -252,68 +252,3 @@ function SampleRow({
     </tr>
   );
 }
-
-function ExpandContractButton({
-  index,
-  expand,
-  setExpand,
-}: {
-  index: number;
-  expand: boolean;
-  setExpand: (expand: boolean, index: number) => void;
-}) {
-  return (
-    <Button
-      variant={"outline"}
-      size={"icon"}
-      className="w-6 h-6 flex items-center justify-center"
-      onClick={(e) => {
-        e.stopPropagation();
-        setExpand(!expand, index);
-      }}
-    >
-      {!expand && <MoveDiagonal className="text-muted-foreground" size={20} />}
-      {expand && <X className="text-muted-foreground" size={20} />}
-    </Button>
-  );
-}
-
-function CopyButton({ text }: { text: string }) {
-  return (
-    <Button
-      variant={"outline"}
-      size={"icon"}
-      className={"w-6 h-6 flex items-center justify-center"}
-      onClick={(e) => {
-        e.stopPropagation();
-        navigator.clipboard.writeText(text);
-        toast.success("Copied to clipboard");
-      }}
-    >
-      <Copy className="text-muted-foreground" size={15} />
-    </Button>
-  );
-}
-
-function UtilityButton({
-  index,
-  expand,
-  setExpand,
-  text,
-}: {
-  index: number;
-  expand: boolean;
-  setExpand: (expand: boolean, index: number) => void;
-  text: string;
-}) {
-  return (
-    <div className="absolute group-hover:flex gap-0 items-center hidden top-0 right-0">
-      <ExpandContractButton
-        index={index}
-        expand={expand}
-        setExpand={setExpand}
-      />
-      <CopyButton text={text} />
-    </div>
-  );
-}
diff --git a/app/(protected)/project/[project_id]/experiments/compare/page.tsx b/app/(protected)/project/[project_id]/experiments/compare/page.tsx
new file mode 100644
index 00000000..59a8d515
--- /dev/null
+++ b/app/(protected)/project/[project_id]/experiments/compare/page.tsx
@@ -0,0 +1,166 @@
+"use client";
+
+import { Badge } from "@/components/ui/badge";
+import { Button } from "@/components/ui/button";
+import { Checkbox } from "@/components/ui/checkbox";
+import { cn } from "@/lib/utils";
+import { ChevronLeft } from "lucide-react";
+import { useRouter, useSearchParams } from "next/navigation";
+import { useState } from "react";
+import data from "../experiment_result";
+
+export default function Experiments() {
+  const router = useRouter();
+  // get run id from query params
+  const searchParams = useSearchParams();
+  const runIds = searchParams.getAll("run_id") as string[];
+  const experiments = data.filter((exp) => runIds.includes(exp.eval.run_id));
+  const isComparable = verifyIfSampleInputsMatch(runIds, data);
+  if (!isComparable) {
+    return (
+      <div className="flex flex-col items-center gap-2 mt-24">
+        <p className="text-center text-md">
+          The selected experiments are not comparable. Please select experiments
+          ran against the same dataset.
+        </p>
+        <Button className="w-fit">New Experiment</Button>
+      </div>
+    );
+  }
+
+  return (
+    <div className="w-full flex flex-col gap-4">
+      <div className="md:px-24 px-12 py-12 flex justify-between bg-muted">
+        {/* <h1 className="text-2xl font-semibold">Run ID: {runId}</h1> */}
+        <Button variant={data.length > 0 ? "default" : "outline"}>
+          New Experiment
+        </Button>
+      </div>
+      <div className="flex flex-col gap-12 w-full px-12">
+        <div className="flex gap-2">
+          <Button variant="outline" onClick={() => router.back()}>
+            <ChevronLeft className="text-muted-foreground" size={20} />
+            Back
+          </Button>
+        </div>
+        {!experiments ||
+          (experiments?.length === 0 && (
+            <div className="flex flex-col items-center gap-2 mt-6">
+              <p className="text-center text-md">
+                No experiments found for comparison.
+              </p>
+              <Button className="w-fit">New Experiment</Button>
+            </div>
+          ))}
+        {experiments[0]?.samples && experiments[0]?.samples?.length > 0 && (
+          <div className="overflow-y-scroll">
+            <table className="table-auto overflow-x-scroll w-screen border-separate border border-muted rounded-md">
+              <thead className="bg-muted">
+                <tr>
+                  <th className="w-12 rounded-md p-2">
+                    <Checkbox />
+                  </th>
+                  <th className="p-2 rounded-md text-sm font-medium">Input</th>
+                  <th className="p-2 rounded-md text-sm font-medium">Target</th>
+                  {experiments.map((experiment, i) => (
+                    <th
+                      key={i}
+                      className="p-2 rounded-md text-sm font-medium"
+                    >{`Output - (${experiment.eval.model})`}</th>
+                  ))}
+                </tr>
+              </thead>
+              <tbody>
+                {experiments[0].samples.map((_, i) => (
+                  <SampleRow key={i} index={i} experiments={experiments} />
+                ))}
+              </tbody>
+            </table>
+          </div>
+        )}
+      </div>
+    </div>
+  );
+}
+
+function SampleRow({
+  index,
+  experiments,
+}: {
+  index: number;
+  experiments: any;
+}) {
+  const [open, setOpen] = useState(false);
+  return (
+    <tr
+      className="hover:cursor-pointer hover:bg-muted group"
+      onClick={() => setOpen(!open)}
+    >
+      <td className="px-2 py-1 text-center">
+        <Checkbox onClick={(e) => e.stopPropagation()} />
+      </td>
+      <td className={cn("text-sm px-2 py-1 max-w-80 relative")}>
+        {typeof experiments[0]?.samples[index]?.input === "string"
+          ? experiments[0]?.samples[index]?.input
+          : Array.isArray(experiments[0]?.samples[index]?.input)
+          ? experiments[0]?.samples[index]?.input[
+              experiments[0]?.samples[index]?.input?.length - 1
+            ]?.content
+          : ""}
+      </td>
+      <td className={cn("relative text-sm px-2 py-1 max-w-80")}>
+        {experiments[0]?.samples[index]?.target || "none"}
+      </td>
+      {experiments.map((experiment: any, i: number) => (
+        <td key={i} className={cn("px-2 py-1 max-w-80 relative")}>
+          <div className="flex flex-col gap-2">
+            <Badge
+              className={cn(
+                experiment?.samples[index]?.score?.value === "I"
+                  ? "hover:bg-red-200 bg-red-200 text-destructive border-destructive"
+                  : "hover:bg-green-200 bg-green-200 text-green-700 border-green-700",
+                "border w-fit"
+              )}
+            >
+              {experiment?.samples[index]?.score?.value === "I"
+                ? "INCORRECT"
+                : "CORRECT"}
+            </Badge>
+            <Badge variant={"secondary"} className="w-fit">
+              {experiment?.samples[index]?.output?.model || ""}
+            </Badge>
+            <p className="text-sm">
+              {experiment?.samples[index]?.output?.choices &&
+              experiment?.samples[index]?.output?.choices?.length > 0
+                ? experiment?.samples[index]?.output?.choices[
+                    experiment?.samples[index]?.output?.choices?.length - 1
+                  ].message?.content
+                : ""}
+            </p>
+          </div>
+        </td>
+      ))}
+    </tr>
+  );
+}
+
+function verifyIfSampleInputsMatch(runIds: string[], data: any): boolean {
+  const experiments = data.filter((exp: any) =>
+    runIds.includes(exp.eval.run_id)
+  );
+  if (experiments.length === 0) return false;
+
+  // iterate through each experiment and each sample and check if the input of sample at index i matches with the input of sample at index i for all experiments
+  for (let i = 0; i < experiments[0]?.samples?.length; i++) {
+    const input = experiments[0]?.samples[i]?.input;
+    for (let j = 1; j < experiments?.length; j++) {
+      if (
+        JSON.stringify(input) !==
+        JSON.stringify(experiments[j]?.samples[i]?.input)
+      )
+        return false;
+    }
+  }
+
+  return true;
+}
diff --git a/app/(protected)/project/[project_id]/experiments/experiment_result.ts b/app/(protected)/project/[project_id]/experiments/experiment_result.ts
index 1f44338f..86f71608 100644
--- a/app/(protected)/project/[project_id]/experiments/experiment_result.ts
+++ b/app/(protected)/project/[project_id]/experiments/experiment_result.ts
@@ -169,6 +169,176 @@ let data = [
     ],
     logging: [],
   },
+  {
+    version: 1,
+    status: "success",
+    eval: {
+      task: "security_guide",
+      task_version: 0,
+      task_file: "src/main.py",
+      task_id: "AjZbe8iaXETqiGGECFzW8E",
+      run_id: "r9FsZc425gEjCFNrXszPqW",
+      created: "2024-06-08T15:48:55",
+      dataset: {
+        name: "security_guide",
+        location: "example://security_guide",
+      },
+      model: "openai/gpt-3.5-turbo",
+      task_attribs: {},
+      task_args: {},
+      model_args: {},
+      config: {},
+      packages: {
+        inspect_ai: "0.3.10",
+      },
+    },
+    plan: {
+      name: "plan",
+      steps: [
+        {
+          solver: "generate",
+          params: {},
+        },
+      ],
+      config: {},
+    },
+    results: {
+      scorer: {
+        name: "includes",
+        params: {},
+      },
+      metrics: {
+        accuracy: {
+          name: "accuracy",
+          value: 0.5,
+          options: {},
+        },
+        bootstrap_std: {
+          name: "bootstrap_std",
+          value: 0.35385166383669864,
+          options: {},
+        },
+      },
+    },
+    stats: {
+      started_at: "2024-06-08T15:48:55",
+      completed_at: "2024-06-08T15:48:58",
+      model_usage: {
+        "openai/gpt-3.5-turbo": {
+          input_tokens: 30,
+          output_tokens: 451,
+          total_tokens: 481,
+        },
+      },
+    },
+    samples: [
+      {
+        id: 1,
+        epoch: 1,
+        input: [
+          {
+            content: "How do I prevent SQL Injection attacks?",
+            source: "input",
+            role: "user",
+          },
+        ],
+        target: "use parameterized queries and prepared statements",
+        messages: [
+          {
+            content: "How do I prevent SQL Injection attacks?",
+            source: "input",
+            role: "user",
+          },
+          {
+            content:
+              "1. Use parameterized queries: Parameterized queries separate SQL code from user input, preventing attackers from inserting malicious code into input fields.\n\n2. Use stored procedures: Stored procedures can prevent SQL injection attacks by defining specific methods for accessing the database, rather than allowing direct access to SQL queries.\n\n3. Input validation: Validate and sanitize all user input before using it in SQL queries. This helps prevent attackers from inserting malicious code into your database.\n\n4. Use ORM frameworks: Object-relational mapping (ORM) frameworks, such as Hibernate or Entity Framework, can help prevent SQL injection attacks by automatically handling parameterized queries and input validation.\n\n5. Limit database permissions: Only grant necessary permissions to database users to prevent unauthorized access and limit the potential damage of a successful SQL injection attack.\n\n6. Update and patch software: Regularly update and patch your database management system and web application to protect against known vulnerabilities that could be exploited by attackers.\n\n7. Use a web application firewall: Deploy a web application firewall to monitor and filter incoming traffic for potential SQL injection attacks. This can help protect your application from malicious code injection.",
+            source: "generate",
+            role: "assistant",
+          },
+        ],
+        output: {
+          model: "gpt-3.5-turbo-0125",
+          choices: [
+            {
+              message: {
+                content:
+                  "1. Use parameterized queries: Parameterized queries separate SQL code from user input, preventing attackers from inserting malicious code into input fields.\n\n2. Use stored procedures: Stored procedures can prevent SQL injection attacks by defining specific methods for accessing the database, rather than allowing direct access to SQL queries.\n\n3. Input validation: Validate and sanitize all user input before using it in SQL queries. This helps prevent attackers from inserting malicious code into your database.\n\n4. Use ORM frameworks: Object-relational mapping (ORM) frameworks, such as Hibernate or Entity Framework, can help prevent SQL injection attacks by automatically handling parameterized queries and input validation.\n\n5. Limit database permissions: Only grant necessary permissions to database users to prevent unauthorized access and limit the potential damage of a successful SQL injection attack.\n\n6. Update and patch software: Regularly update and patch your database management system and web application to protect against known vulnerabilities that could be exploited by attackers.\n\n7. Use a web application firewall: Deploy a web application firewall to monitor and filter incoming traffic for potential SQL injection attacks. This can help protect your application from malicious code injection.",
+                source: "generate",
+                role: "assistant",
+              },
+              stop_reason: "stop",
+            },
+          ],
+          usage: {
+            input_tokens: 15,
+            output_tokens: 222,
+            total_tokens: 237,
+          },
+        },
+        score: {
+          value: "I",
+          answer:
+            "1. Use parameterized queries: Parameterized queries separate SQL code from user input, preventing attackers from inserting malicious code into input fields.\n\n2. Use stored procedures: Stored procedures can prevent SQL injection attacks by defining specific methods for accessing the database, rather than allowing direct access to SQL queries.\n\n3. Input validation: Validate and sanitize all user input before using it in SQL queries. This helps prevent attackers from inserting malicious code into your database.\n\n4. Use ORM frameworks: Object-relational mapping (ORM) frameworks, such as Hibernate or Entity Framework, can help prevent SQL injection attacks by automatically handling parameterized queries and input validation.\n\n5. Limit database permissions: Only grant necessary permissions to database users to prevent unauthorized access and limit the potential damage of a successful SQL injection attack.\n\n6. Update and patch software: Regularly update and patch your database management system and web application to protect against known vulnerabilities that could be exploited by attackers.\n\n7. Use a web application firewall: Deploy a web application firewall to monitor and filter incoming traffic for potential SQL injection attacks. This can help protect your application from malicious code injection.",
+          explanation:
+            "1. Use parameterized queries: Parameterized queries separate SQL code from user input, preventing attackers from inserting malicious code into input fields.\n\n2. Use stored procedures: Stored procedures can prevent SQL injection attacks by defining specific methods for accessing the database, rather than allowing direct access to SQL queries.\n\n3. Input validation: Validate and sanitize all user input before using it in SQL queries. This helps prevent attackers from inserting malicious code into your database.\n\n4. Use ORM frameworks: Object-relational mapping (ORM) frameworks, such as Hibernate or Entity Framework, can help prevent SQL injection attacks by automatically handling parameterized queries and input validation.\n\n5. Limit database permissions: Only grant necessary permissions to database users to prevent unauthorized access and limit the potential damage of a successful SQL injection attack.\n\n6. Update and patch software: Regularly update and patch your database management system and web application to protect against known vulnerabilities that could be exploited by attackers.\n\n7. Use a web application firewall: Deploy a web application firewall to monitor and filter incoming traffic for potential SQL injection attacks. This can help protect your application from malicious code injection.",
+        },
+        metadata: {},
+      },
+      {
+        id: 2,
+        epoch: 1,
+        input: [
+          {
+            content: "How do I prevent SQL Injection attacks?",
+            source: "input",
+            role: "user",
+          },
+        ],
+        target: "Use parameterized queries",
+        messages: [
+          {
+            content: "How do I prevent SQL Injection attacks?",
+            source: "input",
+            role: "user",
+          },
+          {
+            content:
+              "There are several steps you can take to prevent SQL Injection attacks:\n\n1. Use parameterized queries: Use parameterized queries instead of directly embedding user input into SQL statements. This helps to separate data from code and prevents attackers from manipulating the query structure.\n\n2. Input validation: Validate all user input before using it in SQL queries. Make sure that the input matches the expected format and type.\n\n3. Escape user input: If you must dynamically generate SQL queries using user input, make sure to escape special characters in the input to prevent SQL injection.\n\n4. Use stored procedures: Use stored procedures to encapsulate your SQL logic and avoid direct manipulation of SQL queries.\n\n5. Limit database permissions: Restrict the permissions of the database user accounts to limit the damage that can be done in case of a successful SQL injection attack.\n\n6. Update regularly: Keep your database software and applications up to date to ensure that any security vulnerabilities are patched.\n\n7. Use a web application firewall: Implement a web application firewall that can detect and prevent SQL injection attacks.\n\nBy following these best practices, you can significantly reduce the risk of SQL injection attacks on your application.",
+            source: "generate",
+            role: "assistant",
+          },
+        ],
+        output: {
+          model: "gpt-3.5-turbo-0125",
+          choices: [
+            {
+              message: {
+                content:
+                  "There are several steps you can take to prevent SQL Injection attacks:\n\n1. Use parameterized queries: Use parameterized queries instead of directly embedding user input into SQL statements. This helps to separate data from code and prevents attackers from manipulating the query structure.\n\n2. Input validation: Validate all user input before using it in SQL queries. Make sure that the input matches the expected format and type.\n\n3. Escape user input: If you must dynamically generate SQL queries using user input, make sure to escape special characters in the input to prevent SQL injection.\n\n4. Use stored procedures: Use stored procedures to encapsulate your SQL logic and avoid direct manipulation of SQL queries.\n\n5. Limit database permissions: Restrict the permissions of the database user accounts to limit the damage that can be done in case of a successful SQL injection attack.\n\n6. Update regularly: Keep your database software and applications up to date to ensure that any security vulnerabilities are patched.\n\n7. Use a web application firewall: Implement a web application firewall that can detect and prevent SQL injection attacks.\n\nBy following these best practices, you can significantly reduce the risk of SQL injection attacks on your application.",
+                source: "generate",
+                role: "assistant",
+              },
+              stop_reason: "stop",
+            },
+          ],
+          usage: {
+            input_tokens: 15,
+            output_tokens: 229,
+            total_tokens: 244,
+          },
+        },
+        score: {
+          value: "C",
+          answer:
+            "There are several steps you can take to prevent SQL Injection attacks:\n\n1. Use parameterized queries: Use parameterized queries instead of directly embedding user input into SQL statements. This helps to separate data from code and prevents attackers from manipulating the query structure.\n\n2. Input validation: Validate all user input before using it in SQL queries. Make sure that the input matches the expected format and type.\n\n3. Escape user input: If you must dynamically generate SQL queries using user input, make sure to escape special characters in the input to prevent SQL injection.\n\n4. Use stored procedures: Use stored procedures to encapsulate your SQL logic and avoid direct manipulation of SQL queries.\n\n5. Limit database permissions: Restrict the permissions of the database user accounts to limit the damage that can be done in case of a successful SQL injection attack.\n\n6. Update regularly: Keep your database software and applications up to date to ensure that any security vulnerabilities are patched.\n\n7. Use a web application firewall: Implement a web application firewall that can detect and prevent SQL injection attacks.\n\nBy following these best practices, you can significantly reduce the risk of SQL injection attacks on your application.",
+          explanation:
+            "There are several steps you can take to prevent SQL Injection attacks:\n\n1. Use parameterized queries: Use parameterized queries instead of directly embedding user input into SQL statements. This helps to separate data from code and prevents attackers from manipulating the query structure.\n\n2. Input validation: Validate all user input before using it in SQL queries. Make sure that the input matches the expected format and type.\n\n3. Escape user input: If you must dynamically generate SQL queries using user input, make sure to escape special characters in the input to prevent SQL injection.\n\n4. Use stored procedures: Use stored procedures to encapsulate your SQL logic and avoid direct manipulation of SQL queries.\n\n5. Limit database permissions: Restrict the permissions of the database user accounts to limit the damage that can be done in case of a successful SQL injection attack.\n\n6. Update regularly: Keep your database software and applications up to date to ensure that any security vulnerabilities are patched.\n\n7. Use a web application firewall: Implement a web application firewall that can detect and prevent SQL injection attacks.\n\nBy following these best practices, you can significantly reduce the risk of SQL injection attacks on your application.",
+        },
+        metadata: {},
+      },
+    ],
+    logging: [],
+  },
   {
     version: 1,
     status: "success",
diff --git a/app/(protected)/project/[project_id]/experiments/page-client.tsx b/app/(protected)/project/[project_id]/experiments/page-client.tsx
index f84d000f..e20ff97a 100644
--- a/app/(protected)/project/[project_id]/experiments/page-client.tsx
+++ b/app/(protected)/project/[project_id]/experiments/page-client.tsx
@@ -5,16 +5,33 @@ import { Button } from "@/components/ui/button";
 import { Checkbox } from "@/components/ui/checkbox";
 import { cn, formatDateTime } from "@/lib/utils";
 import { useParams, useRouter } from "next/navigation";
+import { useState } from "react";
 import data from "./experiment_result";
 
 export default function Experiments() {
   const router = useRouter();
   const projectId = useParams()?.project_id as string;
+  const [comparisonRunIds, setComparisonRunIds] = useState<string[]>([]);
   return (
     <div className="w-full flex flex-col gap-4">
       <div className="md:px-24 px-12 py-12 flex justify-between bg-muted">
         <h1 className="text-3xl font-semibold">Experiments</h1>
-        <Button>New Experiment</Button>
+        <div className="flex gap-2">
+          <Button>New Experiment</Button>
+          <Button
+            variant={"outline"}
+            disabled={comparisonRunIds.length < 2}
+            onClick={() => {
+              // append comparisonRunIds to query params. & only from the second run id
+              const query = comparisonRunIds
+                .map((runId, i) => (i === 0 ? "" : "&") + "run_id=" + runId)
+                .join("");
+              router.push(`/project/${projectId}/experiments/compare?${query}`);
+            }}
+          >
+            Compare
+          </Button>
+        </div>
       </div>
       <div className="flex flex-col gap-12 w-full px-12">
         <div className="overflow-y-scroll">
@@ -22,7 +39,7 @@ export default function Experiments() {
             <thead className="bg-muted">
               <tr>
                 <th className="w-12 rounded-md p-2">
-                  <Checkbox />
+                  <Checkbox disabled={true} />
                 </th>
                 <th className="p-2 rounded-md text-sm font-medium">Run ID</th>
                 <th className="p-2 rounded-md text-sm font-medium">
@@ -55,8 +72,26 @@ export default function Experiments() {
                     )
                   }
                 >
-                  <td className="px-2 py-1 text-center">
-                    <Checkbox />
+                  <td
+                    className="px-2 py-1 text-center"
+                    onClick={(e) => e.stopPropagation()}
+                  >
+                    <Checkbox
+                      onCheckedChange={(value) => {
+                        if (value) {
+                          setComparisonRunIds([
+                            ...comparisonRunIds,
+                            experiment.eval.run_id,
+                          ]);
+                        } else {
+                          setComparisonRunIds(
+                            comparisonRunIds.filter(
+                              (id) => id !== experiment.eval.run_id
+                            )
+                          );
+                        }
+                      }}
+                    />
                   </td>
                   <td className="text-sm px-2 py-1">
                     {experiment.eval.run_id}
diff --git a/components/experiments/report-utility.tsx b/components/experiments/report-utility.tsx
new file mode 100644
index 00000000..9df05201
--- /dev/null
+++ b/components/experiments/report-utility.tsx
@@ -0,0 +1,68 @@
+import { Button } from "@/components/ui/button";
+import { Copy, MoveDiagonal, X } from "lucide-react";
+import { toast } from "sonner";
+
+export function ExpandContractButton({
+  index,
+  expand,
+  setExpand,
+}: {
+  index: number;
+  expand: boolean;
+  setExpand: (expand: boolean, index: number) => void;
+}) {
+  return (
+    <Button
+      variant={"outline"}
+      size={"icon"}
+      className="w-6 h-6 flex items-center justify-center"
+      onClick={(e) => {
+        e.stopPropagation();
+        setExpand(!expand, index);
+      }}
+    >
+      {!expand && <MoveDiagonal className="text-muted-foreground" size={20} />}
+      {expand && <X className="text-muted-foreground" size={20} />}
+    </Button>
+  );
+}
+
+export function CopyButton({ text }: { text: string }) {
+  return (
+    <Button
+      variant={"outline"}
+      size={"icon"}
+      className={"w-6 h-6 flex items-center justify-center"}
+      onClick={(e) => {
+        e.stopPropagation();
+        navigator.clipboard.writeText(text);
+        toast.success("Copied to clipboard");
+      }}
+    >
+      <Copy className="text-muted-foreground" size={15} />
+    </Button>
+  );
+}
+
+export function UtilityButton({
+  index,
+  expand,
+  setExpand,
+  text,
+}: {
+  index: number;
+  expand: boolean;
+  setExpand: (expand: boolean, index: number) => void;
+  text: string;
+}) {
+  return (
+    <div className="absolute group-hover:flex gap-0 items-center hidden top-0 right-0">
+      <ExpandContractButton
+        index={index}
+        expand={expand}
+        setExpand={setExpand}
+      />
+      <CopyButton text={text} />
+    </div>
+  );
+}

From 80e7c7d39c128b92f7e6c8a53964b69280c40a18 Mon Sep 17 00:00:00 2001
From: Karthik Kalyanaraman <karthik@scale3labs.com>
Date: Mon, 10 Jun 2024 17:41:31 -0700
Subject: [PATCH 06/16] Experiment report

---
 .../experiments/[run_id]/page.tsx             |  60 +-
 .../[project_id]/experiments/compare/page.tsx |   2 +-
 .../experiments/experiment_result.ts          | 634 +-----------------
 .../[project_id]/experiments/page-client.tsx  | 234 ++++---
 4 files changed, 194 insertions(+), 736 deletions(-)

diff --git a/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx b/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx
index d6253c47..42dd2668 100644
--- a/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx
+++ b/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx
@@ -4,7 +4,6 @@ import { UtilityButton } from "@/components/experiments/report-utility";
 import { Conversation } from "@/components/shared/conversation-view";
 import { Badge } from "@/components/ui/badge";
 import { Button } from "@/components/ui/button";
-import { Checkbox } from "@/components/ui/checkbox";
 import { Separator } from "@/components/ui/separator";
 import {
   Sheet,
@@ -31,12 +30,30 @@ export default function Experiments() {
   return (
     <div className="w-full flex flex-col gap-4">
       <div className="md:px-24 px-12 py-12 flex justify-between bg-muted">
-        <h1 className="text-2xl font-semibold">Run ID: {runId}</h1>
-        <Button variant={data.length > 0 ? "default" : "outline"}>
+        <div className="flex gap-2 items-center">
+          <h1 className="text-2xl font-semibold">Run ID: {runId}</h1>
+          <Badge
+            className={cn(
+              "capitalize",
+              experiment.status === "success"
+                ? "text-green-600 bg-green-200 hover:bg-green-200"
+                : "text-destructive bg-red-200 hover:bg-red-200"
+            )}
+          >
+            {experiment.status}
+          </Badge>
+        </div>
+        <Button
+          variant={
+            data?.length > 0 && experiment && experiment?.samples?.length > 0
+              ? "default"
+              : "outline"
+          }
+        >
           New Experiment
         </Button>
       </div>
-      <div className="flex flex-col gap-12 w-full px-12">
+      <div className="flex flex-col gap-6 w-full px-12">
         <div className="flex gap-2">
           <Button variant="outline" onClick={() => router.back()}>
             <ChevronLeft className="text-muted-foreground" size={20} />
@@ -45,6 +62,7 @@ export default function Experiments() {
           <Button
             variant={"outline"}
             size={"icon"}
+            disabled={!experiment?.samples || experiment?.samples?.length === 0}
             onClick={() => {
               setExpand(
                 expand.map(() => {
@@ -53,14 +71,26 @@ export default function Experiments() {
               );
             }}
           >
-            {expand.some((v) => v === false) && (
-              <MoveDiagonal className="text-muted-foreground" size={20} />
-            )}
-            {!expand.some((v) => v === false) && (
-              <X className="text-muted-foreground" size={20} />
-            )}
+            {expand.some((v: any) => v === false) && <MoveDiagonal size={20} />}
+            {!expand.some((v: any) => v === false) && <X size={20} />}
           </Button>
         </div>
+        {experiment?.error && (
+          <div className="flex flex-col gap-4">
+            <p className="text-xl text-center font-semibold">
+              An error occurred while running this experiment. See below for
+              more details
+            </p>
+            <div className="flex flex-col gap-2 p-2 border border-muted-foreground bg-muted rounded-md">
+              <pre className="text-start text-md">
+                {experiment.error.message || "An error occurred."}
+              </pre>
+              <pre className="text-start text-sm">
+                {experiment.error.traceback || "No traceback available."}
+              </pre>
+            </div>
+          </div>
+        )}
         {!experiment ||
           (experiment?.samples?.length === 0 && (
             <div className="flex flex-col items-center gap-2 mt-6">
@@ -75,9 +105,6 @@ export default function Experiments() {
             <table className="table-auto overflow-x-scroll w-screen border-separate border border-muted rounded-md">
               <thead className="bg-muted">
                 <tr>
-                  <th className="w-12 rounded-md p-2">
-                    <Checkbox />
-                  </th>
                   <th className="p-2 rounded-md text-sm font-medium">Input</th>
                   <th className="p-2 rounded-md text-sm font-medium">Target</th>
                   <th className="p-2 rounded-md text-sm font-medium">{`Output - (${experiment.eval.model})`}</th>
@@ -87,7 +114,7 @@ export default function Experiments() {
                 </tr>
               </thead>
               <tbody>
-                {experiment.samples.map((sample, i) => (
+                {experiment.samples.map((sample: any, i: number) => (
                   <SampleRow
                     key={i}
                     index={i}
@@ -97,7 +124,7 @@ export default function Experiments() {
                     expand={expand[i]}
                     setExpand={(value: boolean, index: number) => {
                       setExpand(
-                        expand.map((_, j) => {
+                        expand.map((_: any, j: number) => {
                           return j === index ? value : expand[j];
                         })
                       );
@@ -134,9 +161,6 @@ function SampleRow({
       className="hover:cursor-pointer hover:bg-muted group"
       onClick={() => setOpen(!open)}
     >
-      <td className="px-2 py-1 text-center">
-        <Checkbox onClick={(e) => e.stopPropagation()} />
-      </td>
       <td
         className={cn(
           "text-sm px-2 py-1 max-w-80 relative",
diff --git a/app/(protected)/project/[project_id]/experiments/compare/page.tsx b/app/(protected)/project/[project_id]/experiments/compare/page.tsx
index 59a8d515..ab4a4686 100644
--- a/app/(protected)/project/[project_id]/experiments/compare/page.tsx
+++ b/app/(protected)/project/[project_id]/experiments/compare/page.tsx
@@ -71,7 +71,7 @@ export default function Experiments() {
                 </tr>
               </thead>
               <tbody>
-                {experiments[0].samples.map((_, i) => (
+                {experiments[0].samples.map((_: any, i: number) => (
                   <SampleRow key={i} index={i} experiments={experiments} />
                 ))}
               </tbody>
diff --git a/app/(protected)/project/[project_id]/experiments/experiment_result.ts b/app/(protected)/project/[project_id]/experiments/experiment_result.ts
index 86f71608..c37fe279 100644
--- a/app/(protected)/project/[project_id]/experiments/experiment_result.ts
+++ b/app/(protected)/project/[project_id]/experiments/experiment_result.ts
@@ -1,19 +1,19 @@
-let data = [
+const data: any[] = [
   {
     version: 1,
-    status: "success",
+    status: "error",
     eval: {
       task: "security_guide",
       task_version: 0,
       task_file: "src/main.py",
-      task_id: "WjZbe8iaXETqiGGECFzW8E",
-      run_id: "k9FsZc425gEjCFNrXszPqW",
-      created: "2024-06-08T15:48:55",
+      task_id: "agwg9gxU5RHpXc3aALKijk",
+      run_id: "Kkg2c5UHPamfMk9eYfJLzD",
+      created: "2024-05-30T11:07:59",
       dataset: {
         name: "security_guide",
         location: "example://security_guide",
       },
-      model: "openai/gpt-3.5-turbo",
+      model: "openai/gpt-4",
       task_attribs: {},
       task_args: {},
       model_args: {},
@@ -26,478 +26,33 @@ let data = [
       name: "plan",
       steps: [
         {
-          solver: "generate",
+          solver: "chain_of_thought",
           params: {},
         },
-      ],
-      config: {},
-    },
-    results: {
-      scorer: {
-        name: "includes",
-        params: {},
-      },
-      metrics: {
-        accuracy: {
-          name: "accuracy",
-          value: 0.5,
-          options: {},
-        },
-        bootstrap_std: {
-          name: "bootstrap_std",
-          value: 0.35385166383669864,
-          options: {},
-        },
-      },
-    },
-    stats: {
-      started_at: "2024-06-08T15:48:55",
-      completed_at: "2024-06-08T15:48:58",
-      model_usage: {
-        "openai/gpt-3.5-turbo": {
-          input_tokens: 30,
-          output_tokens: 451,
-          total_tokens: 481,
-        },
-      },
-    },
-    samples: [
-      {
-        id: 1,
-        epoch: 1,
-        input: [
-          {
-            content: "How do I prevent SQL Injection attacks?",
-            source: "input",
-            role: "user",
-          },
-        ],
-        target: "use parameterized queries and prepared statements",
-        messages: [
-          {
-            content: "How do I prevent SQL Injection attacks?",
-            source: "input",
-            role: "user",
-          },
-          {
-            content:
-              "1. Use parameterized queries: Parameterized queries separate SQL code from user input, preventing attackers from inserting malicious code into input fields.\n\n2. Use stored procedures: Stored procedures can prevent SQL injection attacks by defining specific methods for accessing the database, rather than allowing direct access to SQL queries.\n\n3. Input validation: Validate and sanitize all user input before using it in SQL queries. This helps prevent attackers from inserting malicious code into your database.\n\n4. Use ORM frameworks: Object-relational mapping (ORM) frameworks, such as Hibernate or Entity Framework, can help prevent SQL injection attacks by automatically handling parameterized queries and input validation.\n\n5. Limit database permissions: Only grant necessary permissions to database users to prevent unauthorized access and limit the potential damage of a successful SQL injection attack.\n\n6. Update and patch software: Regularly update and patch your database management system and web application to protect against known vulnerabilities that could be exploited by attackers.\n\n7. Use a web application firewall: Deploy a web application firewall to monitor and filter incoming traffic for potential SQL injection attacks. This can help protect your application from malicious code injection.",
-            source: "generate",
-            role: "assistant",
-          },
-        ],
-        output: {
-          model: "gpt-3.5-turbo-0125",
-          choices: [
-            {
-              message: {
-                content:
-                  "1. Use parameterized queries: Parameterized queries separate SQL code from user input, preventing attackers from inserting malicious code into input fields.\n\n2. Use stored procedures: Stored procedures can prevent SQL injection attacks by defining specific methods for accessing the database, rather than allowing direct access to SQL queries.\n\n3. Input validation: Validate and sanitize all user input before using it in SQL queries. This helps prevent attackers from inserting malicious code into your database.\n\n4. Use ORM frameworks: Object-relational mapping (ORM) frameworks, such as Hibernate or Entity Framework, can help prevent SQL injection attacks by automatically handling parameterized queries and input validation.\n\n5. Limit database permissions: Only grant necessary permissions to database users to prevent unauthorized access and limit the potential damage of a successful SQL injection attack.\n\n6. Update and patch software: Regularly update and patch your database management system and web application to protect against known vulnerabilities that could be exploited by attackers.\n\n7. Use a web application firewall: Deploy a web application firewall to monitor and filter incoming traffic for potential SQL injection attacks. This can help protect your application from malicious code injection.",
-                source: "generate",
-                role: "assistant",
-              },
-              stop_reason: "stop",
-            },
-          ],
-          usage: {
-            input_tokens: 15,
-            output_tokens: 222,
-            total_tokens: 237,
-          },
-        },
-        score: {
-          value: "I",
-          answer:
-            "1. Use parameterized queries: Parameterized queries separate SQL code from user input, preventing attackers from inserting malicious code into input fields.\n\n2. Use stored procedures: Stored procedures can prevent SQL injection attacks by defining specific methods for accessing the database, rather than allowing direct access to SQL queries.\n\n3. Input validation: Validate and sanitize all user input before using it in SQL queries. This helps prevent attackers from inserting malicious code into your database.\n\n4. Use ORM frameworks: Object-relational mapping (ORM) frameworks, such as Hibernate or Entity Framework, can help prevent SQL injection attacks by automatically handling parameterized queries and input validation.\n\n5. Limit database permissions: Only grant necessary permissions to database users to prevent unauthorized access and limit the potential damage of a successful SQL injection attack.\n\n6. Update and patch software: Regularly update and patch your database management system and web application to protect against known vulnerabilities that could be exploited by attackers.\n\n7. Use a web application firewall: Deploy a web application firewall to monitor and filter incoming traffic for potential SQL injection attacks. This can help protect your application from malicious code injection.",
-          explanation:
-            "1. Use parameterized queries: Parameterized queries separate SQL code from user input, preventing attackers from inserting malicious code into input fields.\n\n2. Use stored procedures: Stored procedures can prevent SQL injection attacks by defining specific methods for accessing the database, rather than allowing direct access to SQL queries.\n\n3. Input validation: Validate and sanitize all user input before using it in SQL queries. This helps prevent attackers from inserting malicious code into your database.\n\n4. Use ORM frameworks: Object-relational mapping (ORM) frameworks, such as Hibernate or Entity Framework, can help prevent SQL injection attacks by automatically handling parameterized queries and input validation.\n\n5. Limit database permissions: Only grant necessary permissions to database users to prevent unauthorized access and limit the potential damage of a successful SQL injection attack.\n\n6. Update and patch software: Regularly update and patch your database management system and web application to protect against known vulnerabilities that could be exploited by attackers.\n\n7. Use a web application firewall: Deploy a web application firewall to monitor and filter incoming traffic for potential SQL injection attacks. This can help protect your application from malicious code injection.",
-        },
-        metadata: {},
-      },
-      {
-        id: 2,
-        epoch: 1,
-        input: [
-          {
-            content: "How do I prevent SQL Injection attacks?",
-            source: "input",
-            role: "user",
-          },
-        ],
-        target: "Use parameterized queries",
-        messages: [
-          {
-            content: "How do I prevent SQL Injection attacks?",
-            source: "input",
-            role: "user",
-          },
-          {
-            content:
-              "There are several steps you can take to prevent SQL Injection attacks:\n\n1. Use parameterized queries: Use parameterized queries instead of directly embedding user input into SQL statements. This helps to separate data from code and prevents attackers from manipulating the query structure.\n\n2. Input validation: Validate all user input before using it in SQL queries. Make sure that the input matches the expected format and type.\n\n3. Escape user input: If you must dynamically generate SQL queries using user input, make sure to escape special characters in the input to prevent SQL injection.\n\n4. Use stored procedures: Use stored procedures to encapsulate your SQL logic and avoid direct manipulation of SQL queries.\n\n5. Limit database permissions: Restrict the permissions of the database user accounts to limit the damage that can be done in case of a successful SQL injection attack.\n\n6. Update regularly: Keep your database software and applications up to date to ensure that any security vulnerabilities are patched.\n\n7. Use a web application firewall: Implement a web application firewall that can detect and prevent SQL injection attacks.\n\nBy following these best practices, you can significantly reduce the risk of SQL injection attacks on your application.",
-            source: "generate",
-            role: "assistant",
-          },
-        ],
-        output: {
-          model: "gpt-3.5-turbo-0125",
-          choices: [
-            {
-              message: {
-                content:
-                  "There are several steps you can take to prevent SQL Injection attacks:\n\n1. Use parameterized queries: Use parameterized queries instead of directly embedding user input into SQL statements. This helps to separate data from code and prevents attackers from manipulating the query structure.\n\n2. Input validation: Validate all user input before using it in SQL queries. Make sure that the input matches the expected format and type.\n\n3. Escape user input: If you must dynamically generate SQL queries using user input, make sure to escape special characters in the input to prevent SQL injection.\n\n4. Use stored procedures: Use stored procedures to encapsulate your SQL logic and avoid direct manipulation of SQL queries.\n\n5. Limit database permissions: Restrict the permissions of the database user accounts to limit the damage that can be done in case of a successful SQL injection attack.\n\n6. Update regularly: Keep your database software and applications up to date to ensure that any security vulnerabilities are patched.\n\n7. Use a web application firewall: Implement a web application firewall that can detect and prevent SQL injection attacks.\n\nBy following these best practices, you can significantly reduce the risk of SQL injection attacks on your application.",
-                source: "generate",
-                role: "assistant",
-              },
-              stop_reason: "stop",
-            },
-          ],
-          usage: {
-            input_tokens: 15,
-            output_tokens: 229,
-            total_tokens: 244,
-          },
-        },
-        score: {
-          value: "C",
-          answer:
-            "There are several steps you can take to prevent SQL Injection attacks:\n\n1. Use parameterized queries: Use parameterized queries instead of directly embedding user input into SQL statements. This helps to separate data from code and prevents attackers from manipulating the query structure.\n\n2. Input validation: Validate all user input before using it in SQL queries. Make sure that the input matches the expected format and type.\n\n3. Escape user input: If you must dynamically generate SQL queries using user input, make sure to escape special characters in the input to prevent SQL injection.\n\n4. Use stored procedures: Use stored procedures to encapsulate your SQL logic and avoid direct manipulation of SQL queries.\n\n5. Limit database permissions: Restrict the permissions of the database user accounts to limit the damage that can be done in case of a successful SQL injection attack.\n\n6. Update regularly: Keep your database software and applications up to date to ensure that any security vulnerabilities are patched.\n\n7. Use a web application firewall: Implement a web application firewall that can detect and prevent SQL injection attacks.\n\nBy following these best practices, you can significantly reduce the risk of SQL injection attacks on your application.",
-          explanation:
-            "There are several steps you can take to prevent SQL Injection attacks:\n\n1. Use parameterized queries: Use parameterized queries instead of directly embedding user input into SQL statements. This helps to separate data from code and prevents attackers from manipulating the query structure.\n\n2. Input validation: Validate all user input before using it in SQL queries. Make sure that the input matches the expected format and type.\n\n3. Escape user input: If you must dynamically generate SQL queries using user input, make sure to escape special characters in the input to prevent SQL injection.\n\n4. Use stored procedures: Use stored procedures to encapsulate your SQL logic and avoid direct manipulation of SQL queries.\n\n5. Limit database permissions: Restrict the permissions of the database user accounts to limit the damage that can be done in case of a successful SQL injection attack.\n\n6. Update regularly: Keep your database software and applications up to date to ensure that any security vulnerabilities are patched.\n\n7. Use a web application firewall: Implement a web application firewall that can detect and prevent SQL injection attacks.\n\nBy following these best practices, you can significantly reduce the risk of SQL injection attacks on your application.",
-        },
-        metadata: {},
-      },
-    ],
-    logging: [],
-  },
-  {
-    version: 1,
-    status: "success",
-    eval: {
-      task: "security_guide",
-      task_version: 0,
-      task_file: "src/main.py",
-      task_id: "AjZbe8iaXETqiGGECFzW8E",
-      run_id: "r9FsZc425gEjCFNrXszPqW",
-      created: "2024-06-08T15:48:55",
-      dataset: {
-        name: "security_guide",
-        location: "example://security_guide",
-      },
-      model: "openai/gpt-3.5-turbo",
-      task_attribs: {},
-      task_args: {},
-      model_args: {},
-      config: {},
-      packages: {
-        inspect_ai: "0.3.10",
-      },
-    },
-    plan: {
-      name: "plan",
-      steps: [
         {
           solver: "generate",
           params: {},
         },
-      ],
-      config: {},
-    },
-    results: {
-      scorer: {
-        name: "includes",
-        params: {},
-      },
-      metrics: {
-        accuracy: {
-          name: "accuracy",
-          value: 0.5,
-          options: {},
-        },
-        bootstrap_std: {
-          name: "bootstrap_std",
-          value: 0.35385166383669864,
-          options: {},
-        },
-      },
-    },
-    stats: {
-      started_at: "2024-06-08T15:48:55",
-      completed_at: "2024-06-08T15:48:58",
-      model_usage: {
-        "openai/gpt-3.5-turbo": {
-          input_tokens: 30,
-          output_tokens: 451,
-          total_tokens: 481,
-        },
-      },
-    },
-    samples: [
-      {
-        id: 1,
-        epoch: 1,
-        input: [
-          {
-            content: "How do I prevent SQL Injection attacks?",
-            source: "input",
-            role: "user",
-          },
-        ],
-        target: "use parameterized queries and prepared statements",
-        messages: [
-          {
-            content: "How do I prevent SQL Injection attacks?",
-            source: "input",
-            role: "user",
-          },
-          {
-            content:
-              "1. Use parameterized queries: Parameterized queries separate SQL code from user input, preventing attackers from inserting malicious code into input fields.\n\n2. Use stored procedures: Stored procedures can prevent SQL injection attacks by defining specific methods for accessing the database, rather than allowing direct access to SQL queries.\n\n3. Input validation: Validate and sanitize all user input before using it in SQL queries. This helps prevent attackers from inserting malicious code into your database.\n\n4. Use ORM frameworks: Object-relational mapping (ORM) frameworks, such as Hibernate or Entity Framework, can help prevent SQL injection attacks by automatically handling parameterized queries and input validation.\n\n5. Limit database permissions: Only grant necessary permissions to database users to prevent unauthorized access and limit the potential damage of a successful SQL injection attack.\n\n6. Update and patch software: Regularly update and patch your database management system and web application to protect against known vulnerabilities that could be exploited by attackers.\n\n7. Use a web application firewall: Deploy a web application firewall to monitor and filter incoming traffic for potential SQL injection attacks. This can help protect your application from malicious code injection.",
-            source: "generate",
-            role: "assistant",
-          },
-        ],
-        output: {
-          model: "gpt-3.5-turbo-0125",
-          choices: [
-            {
-              message: {
-                content:
-                  "1. Use parameterized queries: Parameterized queries separate SQL code from user input, preventing attackers from inserting malicious code into input fields.\n\n2. Use stored procedures: Stored procedures can prevent SQL injection attacks by defining specific methods for accessing the database, rather than allowing direct access to SQL queries.\n\n3. Input validation: Validate and sanitize all user input before using it in SQL queries. This helps prevent attackers from inserting malicious code into your database.\n\n4. Use ORM frameworks: Object-relational mapping (ORM) frameworks, such as Hibernate or Entity Framework, can help prevent SQL injection attacks by automatically handling parameterized queries and input validation.\n\n5. Limit database permissions: Only grant necessary permissions to database users to prevent unauthorized access and limit the potential damage of a successful SQL injection attack.\n\n6. Update and patch software: Regularly update and patch your database management system and web application to protect against known vulnerabilities that could be exploited by attackers.\n\n7. Use a web application firewall: Deploy a web application firewall to monitor and filter incoming traffic for potential SQL injection attacks. This can help protect your application from malicious code injection.",
-                source: "generate",
-                role: "assistant",
-              },
-              stop_reason: "stop",
-            },
-          ],
-          usage: {
-            input_tokens: 15,
-            output_tokens: 222,
-            total_tokens: 237,
-          },
-        },
-        score: {
-          value: "I",
-          answer:
-            "1. Use parameterized queries: Parameterized queries separate SQL code from user input, preventing attackers from inserting malicious code into input fields.\n\n2. Use stored procedures: Stored procedures can prevent SQL injection attacks by defining specific methods for accessing the database, rather than allowing direct access to SQL queries.\n\n3. Input validation: Validate and sanitize all user input before using it in SQL queries. This helps prevent attackers from inserting malicious code into your database.\n\n4. Use ORM frameworks: Object-relational mapping (ORM) frameworks, such as Hibernate or Entity Framework, can help prevent SQL injection attacks by automatically handling parameterized queries and input validation.\n\n5. Limit database permissions: Only grant necessary permissions to database users to prevent unauthorized access and limit the potential damage of a successful SQL injection attack.\n\n6. Update and patch software: Regularly update and patch your database management system and web application to protect against known vulnerabilities that could be exploited by attackers.\n\n7. Use a web application firewall: Deploy a web application firewall to monitor and filter incoming traffic for potential SQL injection attacks. This can help protect your application from malicious code injection.",
-          explanation:
-            "1. Use parameterized queries: Parameterized queries separate SQL code from user input, preventing attackers from inserting malicious code into input fields.\n\n2. Use stored procedures: Stored procedures can prevent SQL injection attacks by defining specific methods for accessing the database, rather than allowing direct access to SQL queries.\n\n3. Input validation: Validate and sanitize all user input before using it in SQL queries. This helps prevent attackers from inserting malicious code into your database.\n\n4. Use ORM frameworks: Object-relational mapping (ORM) frameworks, such as Hibernate or Entity Framework, can help prevent SQL injection attacks by automatically handling parameterized queries and input validation.\n\n5. Limit database permissions: Only grant necessary permissions to database users to prevent unauthorized access and limit the potential damage of a successful SQL injection attack.\n\n6. Update and patch software: Regularly update and patch your database management system and web application to protect against known vulnerabilities that could be exploited by attackers.\n\n7. Use a web application firewall: Deploy a web application firewall to monitor and filter incoming traffic for potential SQL injection attacks. This can help protect your application from malicious code injection.",
-        },
-        metadata: {},
-      },
-      {
-        id: 2,
-        epoch: 1,
-        input: [
-          {
-            content: "How do I prevent SQL Injection attacks?",
-            source: "input",
-            role: "user",
-          },
-        ],
-        target: "Use parameterized queries",
-        messages: [
-          {
-            content: "How do I prevent SQL Injection attacks?",
-            source: "input",
-            role: "user",
-          },
-          {
-            content:
-              "There are several steps you can take to prevent SQL Injection attacks:\n\n1. Use parameterized queries: Use parameterized queries instead of directly embedding user input into SQL statements. This helps to separate data from code and prevents attackers from manipulating the query structure.\n\n2. Input validation: Validate all user input before using it in SQL queries. Make sure that the input matches the expected format and type.\n\n3. Escape user input: If you must dynamically generate SQL queries using user input, make sure to escape special characters in the input to prevent SQL injection.\n\n4. Use stored procedures: Use stored procedures to encapsulate your SQL logic and avoid direct manipulation of SQL queries.\n\n5. Limit database permissions: Restrict the permissions of the database user accounts to limit the damage that can be done in case of a successful SQL injection attack.\n\n6. Update regularly: Keep your database software and applications up to date to ensure that any security vulnerabilities are patched.\n\n7. Use a web application firewall: Implement a web application firewall that can detect and prevent SQL injection attacks.\n\nBy following these best practices, you can significantly reduce the risk of SQL injection attacks on your application.",
-            source: "generate",
-            role: "assistant",
-          },
-        ],
-        output: {
-          model: "gpt-3.5-turbo-0125",
-          choices: [
-            {
-              message: {
-                content:
-                  "There are several steps you can take to prevent SQL Injection attacks:\n\n1. Use parameterized queries: Use parameterized queries instead of directly embedding user input into SQL statements. This helps to separate data from code and prevents attackers from manipulating the query structure.\n\n2. Input validation: Validate all user input before using it in SQL queries. Make sure that the input matches the expected format and type.\n\n3. Escape user input: If you must dynamically generate SQL queries using user input, make sure to escape special characters in the input to prevent SQL injection.\n\n4. Use stored procedures: Use stored procedures to encapsulate your SQL logic and avoid direct manipulation of SQL queries.\n\n5. Limit database permissions: Restrict the permissions of the database user accounts to limit the damage that can be done in case of a successful SQL injection attack.\n\n6. Update regularly: Keep your database software and applications up to date to ensure that any security vulnerabilities are patched.\n\n7. Use a web application firewall: Implement a web application firewall that can detect and prevent SQL injection attacks.\n\nBy following these best practices, you can significantly reduce the risk of SQL injection attacks on your application.",
-                source: "generate",
-                role: "assistant",
-              },
-              stop_reason: "stop",
-            },
-          ],
-          usage: {
-            input_tokens: 15,
-            output_tokens: 229,
-            total_tokens: 244,
-          },
-        },
-        score: {
-          value: "C",
-          answer:
-            "There are several steps you can take to prevent SQL Injection attacks:\n\n1. Use parameterized queries: Use parameterized queries instead of directly embedding user input into SQL statements. This helps to separate data from code and prevents attackers from manipulating the query structure.\n\n2. Input validation: Validate all user input before using it in SQL queries. Make sure that the input matches the expected format and type.\n\n3. Escape user input: If you must dynamically generate SQL queries using user input, make sure to escape special characters in the input to prevent SQL injection.\n\n4. Use stored procedures: Use stored procedures to encapsulate your SQL logic and avoid direct manipulation of SQL queries.\n\n5. Limit database permissions: Restrict the permissions of the database user accounts to limit the damage that can be done in case of a successful SQL injection attack.\n\n6. Update regularly: Keep your database software and applications up to date to ensure that any security vulnerabilities are patched.\n\n7. Use a web application firewall: Implement a web application firewall that can detect and prevent SQL injection attacks.\n\nBy following these best practices, you can significantly reduce the risk of SQL injection attacks on your application.",
-          explanation:
-            "There are several steps you can take to prevent SQL Injection attacks:\n\n1. Use parameterized queries: Use parameterized queries instead of directly embedding user input into SQL statements. This helps to separate data from code and prevents attackers from manipulating the query structure.\n\n2. Input validation: Validate all user input before using it in SQL queries. Make sure that the input matches the expected format and type.\n\n3. Escape user input: If you must dynamically generate SQL queries using user input, make sure to escape special characters in the input to prevent SQL injection.\n\n4. Use stored procedures: Use stored procedures to encapsulate your SQL logic and avoid direct manipulation of SQL queries.\n\n5. Limit database permissions: Restrict the permissions of the database user accounts to limit the damage that can be done in case of a successful SQL injection attack.\n\n6. Update regularly: Keep your database software and applications up to date to ensure that any security vulnerabilities are patched.\n\n7. Use a web application firewall: Implement a web application firewall that can detect and prevent SQL injection attacks.\n\nBy following these best practices, you can significantly reduce the risk of SQL injection attacks on your application.",
-        },
-        metadata: {},
-      },
-    ],
-    logging: [],
-  },
-  {
-    version: 1,
-    status: "success",
-    eval: {
-      task: "security_guide",
-      task_version: 0,
-      task_file: "src/main.py",
-      task_id: "FuR38tV2t6a3jKjED9Qq8R",
-      run_id: "aYmmWwRL8PBH8wPnBo3e3Z",
-      created: "2024-06-08T16:22:05",
-      dataset: {
-        name: "security_guide",
-        location: "example://security_guide",
-      },
-      model: "openai/gpt-3.5-turbo",
-      task_attribs: {},
-      task_args: {},
-      model_args: {},
-      config: {},
-      packages: {
-        inspect_ai: "0.3.10",
-      },
-    },
-    plan: {
-      name: "plan",
-      steps: [
         {
-          solver: "generate",
+          solver: "self_critique",
           params: {},
         },
       ],
       config: {},
     },
-    results: {
-      scorer: {
-        name: "check_length",
-        params: {},
-      },
-      metrics: {
-        accuracy: {
-          name: "accuracy",
-          value: 0.0,
-          options: {},
-        },
-      },
-    },
     stats: {
-      started_at: "2024-06-08T16:22:05",
-      completed_at: "2024-06-08T16:22:06",
-      model_usage: {
-        "openai/gpt-3.5-turbo": {
-          input_tokens: 32,
-          output_tokens: 74,
-          total_tokens: 106,
-        },
-      },
+      started_at: "2024-05-30T11:07:59",
+      completed_at: "2024-05-30T11:07:59",
+      model_usage: {},
+    },
+    error: {
+      message:
+        "ValueError('user_prompt requested from TaskState but none available')",
+      traceback:
+        'Traceback (most recent call last):\n\n  File "/Users/karthikkalyanaraman/work/inspect_ai_playground/pyenv/lib/python3.10/site-packages/inspect_ai/_eval/task/run.py", line 175, in task_run\n    scores = await asyncio.gather(*tasks)\n\n  File "/Users/karthikkalyanaraman/work/inspect_ai_playground/pyenv/lib/python3.10/site-packages/inspect_ai/_eval/task/run.py", line 256, in task_run_sample\n    state = await solver(state, generate)\n\n  File "/Users/karthikkalyanaraman/work/inspect_ai_playground/pyenv/lib/python3.10/site-packages/inspect_ai/solver/_prompt.py", line 33, in solve\n    prompt = state.user_prompt\n\n  File "/Users/karthikkalyanaraman/work/inspect_ai_playground/pyenv/lib/python3.10/site-packages/inspect_ai/solver/_solver.py", line 124, in user_prompt\n    raise ValueError("user_prompt requested from TaskState but none available")\n\nValueError: user_prompt requested from TaskState but none available\n',
+      traceback_ansi:
+        "\u001b[31m╭─\u001b[0m\u001b[31m──────────────────────────────\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31m───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n\u001b[31m│\u001b[0m \u001b[2;33m/Users/karthikkalyanaraman/work/inspect_ai_playground/pyenv/lib/python3.10/site-packages/inspect\u001b[0m \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[2;33m_ai/_eval/task/\u001b[0m\u001b[1;33mrun.py\u001b[0m:\u001b[94m175\u001b[0m in \u001b[92mtask_run\u001b[0m                                                            \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[2;33m/Users/karthikkalyanaraman/work/inspect_ai_playground/pyenv/lib/python3.10/site-packages/inspect\u001b[0m \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[2;33m_ai/_eval/task/\u001b[0m\u001b[1;33mrun.py\u001b[0m:\u001b[94m256\u001b[0m in \u001b[92mtask_run_sample\u001b[0m                                                     \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[2;33m/Users/karthikkalyanaraman/work/inspect_ai_playground/pyenv/lib/python3.10/site-packages/inspect\u001b[0m \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[2;33m_ai/solver/\u001b[0m\u001b[1;33m_prompt.py\u001b[0m:\u001b[94m33\u001b[0m in \u001b[92msolve\u001b[0m                                                                \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[2;33m/Users/karthikkalyanaraman/work/inspect_ai_playground/pyenv/lib/python3.10/site-packages/inspect\u001b[0m \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[2;33m_ai/solver/\u001b[0m\u001b[1;33m_solver.py\u001b[0m:\u001b[94m124\u001b[0m in \u001b[92muser_prompt\u001b[0m                                                         \u001b[31m│\u001b[0m\n\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n\u001b[1;91mValueError: \u001b[0muser_prompt requested from TaskState but none available\n",
     },
-    samples: [
-      {
-        id: 1,
-        epoch: 1,
-        input: [
-          {
-            content: "Write a funny tweet about newyork city?",
-            source: "input",
-            role: "user",
-          },
-        ],
-        target: "",
-        messages: [
-          {
-            content: "Write a funny tweet about newyork city?",
-            source: "input",
-            role: "user",
-          },
-          {
-            content:
-              '"Just spent 3 hours trying to find a public restroom in NYC, ended up accidentally walking into a trendy hipster coffee shop bathroom where they charge $10 for a latte with a side of TP #cityproblems #NYC"',
-            source: "generate",
-            role: "assistant",
-          },
-        ],
-        output: {
-          model: "gpt-3.5-turbo-0125",
-          choices: [
-            {
-              message: {
-                content:
-                  '"Just spent 3 hours trying to find a public restroom in NYC, ended up accidentally walking into a trendy hipster coffee shop bathroom where they charge $10 for a latte with a side of TP #cityproblems #NYC"',
-                source: "generate",
-                role: "assistant",
-              },
-              stop_reason: "stop",
-            },
-          ],
-          usage: {
-            input_tokens: 17,
-            output_tokens: 47,
-            total_tokens: 64,
-          },
-        },
-        score: {
-          value: "I",
-          answer:
-            '"Just spent 3 hours trying to find a public restroom in NYC, ended up accidentally walking into a trendy hipster coffee shop bathroom where they charge $10 for a latte with a side of TP #cityproblems #NYC"',
-        },
-        metadata: {},
-      },
-      {
-        id: 2,
-        epoch: 1,
-        input: [
-          {
-            content: "Write a funny tweet about vancouver?",
-            source: "input",
-            role: "user",
-          },
-        ],
-        target: "",
-        messages: [
-          {
-            content: "Write a funny tweet about vancouver?",
-            source: "input",
-            role: "user",
-          },
-          {
-            content:
-              "Just saw someone try to use an umbrella in Vancouver... and that's how you know they're not a local. #raincitylife",
-            source: "generate",
-            role: "assistant",
-          },
-        ],
-        output: {
-          model: "gpt-3.5-turbo-0125",
-          choices: [
-            {
-              message: {
-                content:
-                  "Just saw someone try to use an umbrella in Vancouver... and that's how you know they're not a local. #raincitylife",
-                source: "generate",
-                role: "assistant",
-              },
-              stop_reason: "stop",
-            },
-          ],
-          usage: {
-            input_tokens: 15,
-            output_tokens: 27,
-            total_tokens: 42,
-          },
-        },
-        score: {
-          value: "I",
-          answer:
-            "Just saw someone try to use an umbrella in Vancouver... and that's how you know they're not a local. #raincitylife",
-        },
-        metadata: {},
-      },
-    ],
     logging: [],
   },
   {
@@ -549,12 +104,12 @@ let data = [
       metrics: {
         accuracy: {
           name: "accuracy",
-          value: 1,
+          value: 1.0,
           options: {},
         },
         bootstrap_std: {
           name: "bootstrap_std",
-          value: 0,
+          value: 0.0,
           options: {},
         },
       },
@@ -647,153 +202,6 @@ let data = [
     ],
     logging: [],
   },
-  {
-    version: 1,
-    status: "success",
-    eval: {
-      task: "security_guide",
-      task_version: 0,
-      task_file: "src/main.py",
-      task_id: "WaGqtQ7sZYWkmmDYcubjiv",
-      run_id: "ar7LzC2PefbchfLMasaasaF",
-      created: "2024-05-30T11:06:32",
-      dataset: {
-        name: "security_guide",
-        location: "example://security_guide",
-      },
-      model: "openai/gpt-4",
-      task_attribs: {},
-      task_args: {},
-      model_args: {},
-      config: {},
-      packages: {
-        inspect_ai: "0.3.10",
-      },
-    },
-    plan: {
-      name: "plan",
-      steps: [
-        {
-          solver: "chain_of_thought",
-          params: {},
-        },
-        {
-          solver: "generate",
-          params: {},
-        },
-        {
-          solver: "self_critique",
-          params: {},
-        },
-      ],
-      config: {},
-    },
-    results: {
-      scorer: {
-        name: "model_graded_fact",
-        params: {},
-      },
-      metrics: {
-        accuracy: {
-          name: "accuracy",
-          value: 1,
-          options: {},
-        },
-        bootstrap_std: {
-          name: "bootstrap_std",
-          value: 0,
-          options: {},
-        },
-      },
-    },
-    stats: {
-      started_at: "2024-05-30T11:06:32",
-      completed_at: "2024-05-30T11:06:45",
-      model_usage: {
-        "openai/gpt-4": {
-          input_tokens: 1677,
-          output_tokens: 728,
-          total_tokens: 2405,
-        },
-      },
-    },
-    samples: [
-      {
-        id: 1,
-        epoch: 1,
-        input: "How do I prevent SQL Injection attacks?",
-        target: "use parameterized queries and prepared statements",
-        messages: [
-          {
-            content:
-              '\nHow do I prevent SQL Injection attacks?\n\nBefore answering, reason in a step-by-step manner as to get the right answer. Provide your answer at the end on its own line in the form "ANSWER: $ANSWER" (without quotes) where $ANSWER is the answer to the question.\n',
-            source: "input",
-            role: "user",
-          },
-          {
-            content:
-              "1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack that allows attackers to manipulate the SQL code of a web application to gain unauthorized access to the database or to perform harmful actions.\n\n2. Sanitize user inputs: One of the most common ways attackers exploit SQL Injection vulnerabilities is through user inputs. Make sure to validate and sanitize all user inputs to prevent malicious code from being executed.\n\n3. Use parameterized queries: Instead of dynamically creating SQL queries by concatenating strings, use parameterized queries which automatically sanitize inputs by treating them as data rather than executable code.\n\n4. Limit database permissions: Ensure that the database user account used by your application has the least amount of privileges necessary. Avoid using an admin account for regular application tasks.\n\n5. Update and patch regularly: Make sure your database management system, web server, and application software are up to date with the latest security patches to protect against known vulnerabilities.\n\n6. Implement input validation: Validate and sanitize user inputs on both client-side and server-side to reduce the risk of SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, sanitize user inputs, use parameterized queries, limit database permissions, update and patch regularly, and implement input validation.",
-            source: "generate",
-            role: "assistant",
-          },
-          {
-            content:
-              '\nGiven the following question, initial answer and critique please generate an improved answer to the question:\n\n[BEGIN DATA]\n***\n[Question]: How do I prevent SQL Injection attacks?\n***\n[Answer]: 1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack that allows attackers to manipulate the SQL code of a web application to gain unauthorized access to the database or to perform harmful actions.\n\n2. Sanitize user inputs: One of the most common ways attackers exploit SQL Injection vulnerabilities is through user inputs. Make sure to validate and sanitize all user inputs to prevent malicious code from being executed.\n\n3. Use parameterized queries: Instead of dynamically creating SQL queries by concatenating strings, use parameterized queries which automatically sanitize inputs by treating them as data rather than executable code.\n\n4. Limit database permissions: Ensure that the database user account used by your application has the least amount of privileges necessary. Avoid using an admin account for regular application tasks.\n\n5. Update and patch regularly: Make sure your database management system, web server, and application software are up to date with the latest security patches to protect against known vulnerabilities.\n\n6. Implement input validation: Validate and sanitize user inputs on both client-side and server-side to reduce the risk of SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, sanitize user inputs, use parameterized queries, limit database permissions, update and patch regularly, and implement input validation.\n***\n[Critique]: The answer provides a good overview of ways to prevent SQL Injection attacks, but it lacks depth and specificity in explaining each point. It would be beneficial to provide more details and examples for each prevention method to ensure a comprehensive understanding for the reader. Additionally, the answer could benefit from explaining why each prevention method is effective in stopping SQL Injection attacks.\n***\n[END DATA]\n\nIf the original answer is already correct, just repeat the original answer exactly. Provide your answer at the end on its own line in the form "ANSWER: $ANSWER" (without quotes) where $ANSWER is the answer to the question.\n',
-            role: "user",
-          },
-          {
-            content:
-              "Improved Answer:\n1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack where malicious SQL statements are inserted into an entry field for execution. This can allow attackers to gain unauthorized access to data or manipulate it in unintended ways.\n\n2. Sanitize user inputs: By validating and sanitizing all user inputs, you can ensure that no malicious SQL code is included. This involves removing potentially harmful characters or encoding inputs to prevent attacks.\n\n3. Use parameterized queries: Parameterized queries separate SQL code from user input, treating them as data rather than executable code. This prevents attackers from injecting SQL statements through user inputs.\n\n4. Limit database permissions: Implement a principle of least privilege by granting only necessary permissions to database users. Avoid using overly permissive admin accounts for everyday tasks, reducing the potential impact of a successful attack.\n\n5. Update and patch regularly: Keeping all software components up to date with security patches is crucial to fixing known vulnerabilities that could be exploited by attackers. This helps in safeguarding against SQL Injection attacks.\n\n6. Implement input validation: Utilize both client-side and server-side validation mechanisms to verify and sanitize inputs before interacting with the database. This multi-layered approach can significantly reduce the risk of successful SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, it is essential to understand the nature of the attacks, sanitize user inputs, use parameterized queries, limit database permissions, update software regularly, and implement thorough input validation.",
-            source: "generate",
-            role: "assistant",
-          },
-        ],
-        output: {
-          model: "gpt-4",
-          choices: [
-            {
-              message: {
-                content:
-                  "Improved Answer:\n1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack where malicious SQL statements are inserted into an entry field for execution. This can allow attackers to gain unauthorized access to data or manipulate it in unintended ways.\n\n2. Sanitize user inputs: By validating and sanitizing all user inputs, you can ensure that no malicious SQL code is included. This involves removing potentially harmful characters or encoding inputs to prevent attacks.\n\n3. Use parameterized queries: Parameterized queries separate SQL code from user input, treating them as data rather than executable code. This prevents attackers from injecting SQL statements through user inputs.\n\n4. Limit database permissions: Implement a principle of least privilege by granting only necessary permissions to database users. Avoid using overly permissive admin accounts for everyday tasks, reducing the potential impact of a successful attack.\n\n5. Update and patch regularly: Keeping all software components up to date with security patches is crucial to fixing known vulnerabilities that could be exploited by attackers. This helps in safeguarding against SQL Injection attacks.\n\n6. Implement input validation: Utilize both client-side and server-side validation mechanisms to verify and sanitize inputs before interacting with the database. This multi-layered approach can significantly reduce the risk of successful SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, it is essential to understand the nature of the attacks, sanitize user inputs, use parameterized queries, limit database permissions, update software regularly, and implement thorough input validation.",
-                source: "generate",
-                role: "assistant",
-              },
-              stop_reason: "stop",
-            },
-          ],
-          usage: {
-            input_tokens: 738,
-            output_tokens: 294,
-            total_tokens: 1032,
-          },
-        },
-        score: {
-          value: "C",
-          answer:
-            "Improved Answer:\n1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack where malicious SQL statements are inserted into an entry field for execution. This can allow attackers to gain unauthorized access to data or manipulate it in unintended ways.\n\n2. Sanitize user inputs: By validating and sanitizing all user inputs, you can ensure that no malicious SQL code is included. This involves removing potentially harmful characters or encoding inputs to prevent attacks.\n\n3. Use parameterized queries: Parameterized queries separate SQL code from user input, treating them as data rather than executable code. This prevents attackers from injecting SQL statements through user inputs.\n\n4. Limit database permissions: Implement a principle of least privilege by granting only necessary permissions to database users. Avoid using overly permissive admin accounts for everyday tasks, reducing the potential impact of a successful attack.\n\n5. Update and patch regularly: Keeping all software components up to date with security patches is crucial to fixing known vulnerabilities that could be exploited by attackers. This helps in safeguarding against SQL Injection attacks.\n\n6. Implement input validation: Utilize both client-side and server-side validation mechanisms to verify and sanitize inputs before interacting with the database. This multi-layered approach can significantly reduce the risk of successful SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, it is essential to understand the nature of the attacks, sanitize user inputs, use parameterized queries, limit database permissions, update software regularly, and implement thorough input validation.",
-          explanation:
-            "1. The expert answer mentions using parameterized queries and prepared statements as a way to prevent SQL Injection attacks.\n2. The submission also includes using parameterized queries as one of the ways to prevent SQL Injection attacks.\n3. The submission expands on the concept, explaining what SQL Injection attacks are, sanitizing user inputs, limiting database permissions, updating software regularly, and implementing input validation.\n4. All additional points mentioned in the submission are relevant and contribute to preventing SQL Injection attacks.\n\nAfter a thorough comparison, it is evident that the submission contains the content mentioned in the expert answer. \n\nGRADE: C",
-          metadata: {
-            grading: [
-              {
-                content:
-                  "\nYou are comparing a submitted answer to an expert answer on a given question. Here is the data:\n\n[BEGIN DATA]\n************\n[Question]: How do I prevent SQL Injection attacks?\n************\n[Expert]: use parameterized queries and prepared statements\n************\n[Submission]: Improved Answer:\n1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack where malicious SQL statements are inserted into an entry field for execution. This can allow attackers to gain unauthorized access to data or manipulate it in unintended ways.\n\n2. Sanitize user inputs: By validating and sanitizing all user inputs, you can ensure that no malicious SQL code is included. This involves removing potentially harmful characters or encoding inputs to prevent attacks.\n\n3. Use parameterized queries: Parameterized queries separate SQL code from user input, treating them as data rather than executable code. This prevents attackers from injecting SQL statements through user inputs.\n\n4. Limit database permissions: Implement a principle of least privilege by granting only necessary permissions to database users. Avoid using overly permissive admin accounts for everyday tasks, reducing the potential impact of a successful attack.\n\n5. Update and patch regularly: Keeping all software components up to date with security patches is crucial to fixing known vulnerabilities that could be exploited by attackers. This helps in safeguarding against SQL Injection attacks.\n\n6. Implement input validation: Utilize both client-side and server-side validation mechanisms to verify and sanitize inputs before interacting with the database. This multi-layered approach can significantly reduce the risk of successful SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, it is essential to understand the nature of the attacks, sanitize user inputs, use parameterized queries, limit database permissions, update software regularly, and implement thorough input validation.\n************\n[END DATA]\n\nCompare the factual content of the submitted answer with the expert answer. Ignore any differences in style, grammar, or punctuation.\n\nDoes the submission contain the content in the expert answer?\n\n\nAfter assessing the submitted answer, reply with 'GRADE: $LETTER' (without quotes) where LETTER is one of CI.  Please choose ONE option for the grade: either \"C\" for correct answers, or \"I\" for incorrect answers.\n\nFor example, after reviewing a correct answer you might write 'GRADE: C' or after reviewing an incorrect answer you might write 'GRADE: I'.\n\nFirst, write out in a step by step manner your reasoning about the criterion to be sure that your conclusion is correct. Avoid simply stating the correct answers at the outset. Then, end with your answer formatted as 'GRADE: $LETTER' (without quotes) where LETTER is one of CI.\n\n",
-                role: "user",
-              },
-              {
-                content:
-                  "1. The expert answer mentions using parameterized queries and prepared statements as a way to prevent SQL Injection attacks.\n2. The submission also includes using parameterized queries as one of the ways to prevent SQL Injection attacks.\n3. The submission expands on the concept, explaining what SQL Injection attacks are, sanitizing user inputs, limiting database permissions, updating software regularly, and implementing input validation.\n4. All additional points mentioned in the submission are relevant and contribute to preventing SQL Injection attacks.\n\nAfter a thorough comparison, it is evident that the submission contains the content mentioned in the expert answer. \n\nGRADE: C",
-                source: "generate",
-                role: "assistant",
-              },
-            ],
-          },
-        },
-        metadata: {},
-      },
-    ],
-    logging: [],
-  },
 ];
 
 export default data;
diff --git a/app/(protected)/project/[project_id]/experiments/page-client.tsx b/app/(protected)/project/[project_id]/experiments/page-client.tsx
index e20ff97a..665cdf86 100644
--- a/app/(protected)/project/[project_id]/experiments/page-client.tsx
+++ b/app/(protected)/project/[project_id]/experiments/page-client.tsx
@@ -17,7 +17,9 @@ export default function Experiments() {
       <div className="md:px-24 px-12 py-12 flex justify-between bg-muted">
         <h1 className="text-3xl font-semibold">Experiments</h1>
         <div className="flex gap-2">
-          <Button>New Experiment</Button>
+          <Button variant={data.length > 0 ? "default" : "outline"}>
+            New Experiment
+          </Button>
           <Button
             variant={"outline"}
             disabled={comparisonRunIds.length < 2}
@@ -34,111 +36,135 @@ export default function Experiments() {
         </div>
       </div>
       <div className="flex flex-col gap-12 w-full px-12">
-        <div className="overflow-y-scroll">
-          <table className="table-auto overflow-x-scroll w-max border-separate border border-muted rounded-md mt-6">
-            <thead className="bg-muted">
-              <tr>
-                <th className="w-12 rounded-md p-2">
-                  <Checkbox disabled={true} />
-                </th>
-                <th className="p-2 rounded-md text-sm font-medium">Run ID</th>
-                <th className="p-2 rounded-md text-sm font-medium">
-                  Started at
-                </th>
-                <th className="p-2 rounded-md text-sm font-medium">
-                  Completed at
-                </th>
-                <th className="p-2 rounded-md text-sm font-medium">Task</th>
-                <th className="p-2 rounded-md text-sm font-medium">
-                  Total Samples
-                </th>
-                <th className="p-2 rounded-md text-sm font-medium">Model</th>
-                <th className="p-2 rounded-md text-sm font-medium">
-                  Plan Name
-                </th>
-                <th className="p-2 rounded-md text-sm font-medium">Scorer</th>
-                <th className="p-2 rounded-md text-sm font-medium">Metrics</th>
-                <th className="p-2 rounded-md text-sm font-medium">Status</th>
-              </tr>
-            </thead>
-            <tbody>
-              {data.map((experiment) => (
-                <tr
-                  key={experiment.eval.task_id}
-                  className="hover:cursor-pointer hover:bg-muted"
-                  onClick={() =>
-                    router.push(
-                      `/project/${projectId}/experiments/${experiment.eval.run_id}`
-                    )
-                  }
-                >
-                  <td
-                    className="px-2 py-1 text-center"
-                    onClick={(e) => e.stopPropagation()}
-                  >
-                    <Checkbox
-                      onCheckedChange={(value) => {
-                        if (value) {
-                          setComparisonRunIds([
-                            ...comparisonRunIds,
-                            experiment.eval.run_id,
-                          ]);
-                        } else {
-                          setComparisonRunIds(
-                            comparisonRunIds.filter(
-                              (id) => id !== experiment.eval.run_id
-                            )
-                          );
-                        }
-                      }}
-                    />
-                  </td>
-                  <td className="text-sm px-2 py-1">
-                    {experiment.eval.run_id}
-                  </td>
-                  <td className="text-sm px-2 py-1">
-                    {formatDateTime(experiment.stats.started_at, true)}
-                  </td>
-                  <td className="text-sm px-2 py-1">
-                    {formatDateTime(experiment.stats.completed_at, true)}
-                  </td>
-                  <td className="text-sm px-2 py-1">{experiment.eval.task}</td>
-                  <td className="text-sm px-2 py-1">
-                    {experiment.samples?.length || 0}
-                  </td>
-                  <td className="text-sm px-2 py-1">{experiment.eval.model}</td>
-                  <td className="text-sm px-2 py-1">{experiment.plan.name}</td>
-                  <td className="text-sm px-2 py-1">
-                    {experiment.results?.scorer?.name || "N/A"}
-                  </td>
-                  <td className="text-sm px-2 py-1 flex flex-wrap gap-2 w-72">
-                    {Object.keys(experiment.results?.metrics).map(
-                      (metric, i) => (
-                        <Badge variant={"outline"} className="" key={i}>
-                          {metric +
-                            ": " +
-                            (experiment.results.metrics as any)[
-                              metric
-                            ].value.toFixed(2)}
-                        </Badge>
+        {data && data.length === 0 && (
+          <div className="flex flex-col items-center gap-2 mt-24">
+            <p className="text-center text-md">
+              No experiments found. Get started by running your first
+              experiment.
+            </p>
+            <Button>New Experiment</Button>
+          </div>
+        )}
+        {data && data.length > 0 && (
+          <div className="overflow-y-scroll">
+            <table className="table-auto overflow-x-scroll w-max border-separate border border-muted rounded-md mt-6">
+              <thead className="bg-muted">
+                <tr>
+                  <th className="w-12 rounded-md p-2">
+                    <Checkbox disabled={true} />
+                  </th>
+                  <th className="p-2 rounded-md text-sm font-medium">Run ID</th>
+                  <th className="p-2 rounded-md text-sm font-medium">
+                    Started at
+                  </th>
+                  <th className="p-2 rounded-md text-sm font-medium">
+                    Completed at
+                  </th>
+                  <th className="p-2 rounded-md text-sm font-medium">Task</th>
+                  <th className="p-2 rounded-md text-sm font-medium">
+                    Total Samples
+                  </th>
+                  <th className="p-2 rounded-md text-sm font-medium">Model</th>
+                  <th className="p-2 rounded-md text-sm font-medium">
+                    Plan Name
+                  </th>
+                  <th className="p-2 rounded-md text-sm font-medium">Scorer</th>
+                  <th className="p-2 rounded-md text-sm font-medium">
+                    Metrics
+                  </th>
+                  <th className="p-2 rounded-md text-sm font-medium">Status</th>
+                </tr>
+              </thead>
+              <tbody>
+                {data.map((experiment) => (
+                  <tr
+                    key={experiment.eval.task_id}
+                    className="hover:cursor-pointer hover:bg-muted"
+                    onClick={() =>
+                      router.push(
+                        `/project/${projectId}/experiments/${experiment.eval.run_id}`
                       )
-                    )}
-                  </td>
-                  <td
-                    className={cn(
-                      "text-sm px-2 py-1 font-semibold",
-                      experiment.status === "success"
-                        ? "text-green-600"
-                        : "text-destructive"
-                    )}
+                    }
                   >
-                    {experiment.status}
-                  </td>
-                </tr>
-              ))}
-            </tbody>
-          </table>
-        </div>
+                    <td
+                      className="px-2 py-1 text-center"
+                      onClick={(e) => e.stopPropagation()}
+                    >
+                      <Checkbox
+                        disabled={experiment.status !== "success"}
+                        onCheckedChange={(value) => {
+                          if (value) {
+                            setComparisonRunIds([
+                              ...comparisonRunIds,
+                              experiment.eval.run_id,
+                            ]);
+                          } else {
+                            setComparisonRunIds(
+                              comparisonRunIds.filter(
+                                (id) => id !== experiment.eval.run_id
+                              )
+                            );
+                          }
+                        }}
+                      />
+                    </td>
+                    <td className="text-sm px-2 py-1">
+                      {experiment.eval.run_id}
+                    </td>
+                    <td className="text-sm px-2 py-1">
+                      {formatDateTime(experiment.stats.started_at, true)}
+                    </td>
+                    <td className="text-sm px-2 py-1">
+                      {formatDateTime(experiment.stats.completed_at, true)}
+                    </td>
+                    <td className="text-sm px-2 py-1">
+                      {experiment.eval.task}
+                    </td>
+                    <td className="text-sm px-2 py-1">
+                      {experiment.samples?.length || 0}
+                    </td>
+                    <td className="text-sm px-2 py-1">
+                      {experiment.eval.model}
+                    </td>
+                    <td className="text-sm px-2 py-1">
+                      {experiment.plan.name}
+                    </td>
+                    <td className="text-sm px-2 py-1">
+                      {experiment.results?.scorer?.name || "N/A"}
+                    </td>
+                    <td className="text-sm px-2 py-1 flex flex-wrap gap-2 w-72">
+                      {experiment.results?.metrics
+                        ? Object.keys(experiment.results?.metrics).map(
+                            (metric, i) => (
+                              <Badge variant={"outline"} className="" key={i}>
+                                {metric +
+                                  ": " +
+                                  (experiment.results.metrics as any)[
+                                    metric
+                                  ].value.toFixed(2)}
+                              </Badge>
+                            )
+                          )
+                        : "N/A"}
+                    </td>
+                    <td className="px-2 py-1">
+                      <Badge
+                        className={cn(
+                          "capitalize",
+                          experiment.status === "success"
+                            ? "text-green-600 bg-green-200 hover:bg-green-200"
+                            : "text-destructive bg-red-200 hover:bg-red-200"
+                        )}
+                      >
+                        {experiment.status}
+                      </Badge>
+                    </td>
+                  </tr>
+                ))}
+              </tbody>
+            </table>
+          </div>
+        )}
       </div>
     </div>
   );

From 577eea3d1796f127a287d76216b479a1cc0158e0 Mon Sep 17 00:00:00 2001
From: Karthik Kalyanaraman <karthik@scale3labs.com>
Date: Wed, 12 Jun 2024 21:18:39 -0700
Subject: [PATCH 07/16] Experiments - hook up APIs

---
 .../datasets/dataset/[dataset_id]/page.tsx    |   7 +-
 .../experiments/[run_id]/page.tsx             | 102 ++++++---
 .../[project_id]/experiments/compare/page.tsx | 143 +++++++-----
 .../experiments/experiment_result.ts          | 207 ------------------
 .../[project_id]/experiments/page-client.tsx  | 200 +++++++++--------
 app/api/dataset/download/route.ts             |  86 ++++++--
 app/api/run/route.ts                          | 198 +++++++++++++++++
 components/shared/download-dataset.tsx        |  39 ++--
 prisma/schema.prisma                          |  46 ++--
 9 files changed, 595 insertions(+), 433 deletions(-)
 delete mode 100644 app/(protected)/project/[project_id]/experiments/experiment_result.ts
 create mode 100644 app/api/run/route.ts

diff --git a/app/(protected)/project/[project_id]/datasets/dataset/[dataset_id]/page.tsx b/app/(protected)/project/[project_id]/datasets/dataset/[dataset_id]/page.tsx
index 25097520..0803efba 100644
--- a/app/(protected)/project/[project_id]/datasets/dataset/[dataset_id]/page.tsx
+++ b/app/(protected)/project/[project_id]/datasets/dataset/[dataset_id]/page.tsx
@@ -17,6 +17,7 @@ import { useQuery } from "react-query";
 import { toast } from "sonner";
 
 export default function Dataset() {
+  const projectId = useParams()?.project_id as string;
   const dataset_id = useParams()?.dataset_id as string;
   const [page, setPage] = useState<number>(1);
   const [totalPages, setTotalPages] = useState<number>(1);
@@ -92,7 +93,11 @@ export default function Dataset() {
             Back
           </Button>
           <CreateData datasetId={dataset_id} />
-          <DownloadDataset datasetId={dataset_id} disabled={fetchDataset.isLoading || currentData?.length === 0} />
+          <DownloadDataset
+            projectId={projectId}
+            datasetId={dataset_id}
+            disabled={fetchDataset.isLoading || currentData?.length === 0}
+          />
         </div>
         <div className="flex flex-col gap-3 rounded-md border border-muted max-h-screen overflow-y-scroll">
           <div className="grid grid-cols-5 items-center justify-stretch gap-3 py-3 px-4 bg-muted">
diff --git a/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx b/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx
index 42dd2668..b9e93ed5 100644
--- a/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx
+++ b/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx
@@ -16,25 +16,60 @@ import { cn } from "@/lib/utils";
 import { ChevronLeft, ChevronRight, MoveDiagonal, X } from "lucide-react";
 import { useParams, useRouter } from "next/navigation";
 import { useState } from "react";
-import data from "../experiment_result";
+import { useQuery } from "react-query";
+import { toast } from "sonner";
 
 export default function Experiments() {
   const router = useRouter();
   const runId = useParams()?.run_id as string;
-  const experiment = data.find((exp) => exp.eval.run_id === runId);
-  const [expand, setExpand] = useState(
-    experiment?.samples && experiment?.samples?.length > 0
-      ? experiment?.samples.map(() => false)
-      : []
-  );
+  const projectId = useParams()?.project_id as string;
+
+  const [expand, setExpand] = useState<boolean[]>();
+  const [experiment, setExperiment] = useState<any>({});
+
+  const { isLoading: experimentLoading, error: experimentError } = useQuery({
+    queryKey: ["fetch-experiments-query", projectId, runId],
+    queryFn: async () => {
+      const response = await fetch(
+        `/api/run?projectId=${projectId}&runId=${runId}`
+      );
+      if (!response.ok) {
+        const error = await response.json();
+        throw new Error(error?.message || "Failed to fetch the experiment");
+      }
+      const result = await response.json();
+      if (!result.run || !result.run.log) {
+        throw new Error("No experiment found");
+      }
+      const exp = JSON.parse(result.run.log);
+      setExperiment(exp);
+      setExpand(
+        exp?.samples && exp?.samples?.length > 0
+          ? exp?.samples.map(() => false)
+          : []
+      );
+      return result;
+    },
+    onError: (error) => {
+      toast.error("Failed to fetch the experiment", {
+        description: error instanceof Error ? error.message : String(error),
+      });
+    },
+  });
+
+  if (experimentLoading) {
+    return <div>Loading...</div>;
+  }
+
   return (
     <div className="w-full flex flex-col gap-4">
-      <div className="md:px-24 px-12 py-12 flex justify-between bg-muted">
-        <div className="flex gap-2 items-center">
-          <h1 className="text-2xl font-semibold">Run ID: {runId}</h1>
+      <div className="px-12 py-12 flex justify-between bg-muted">
+        <div className="flex flex-col gap-2">
+          <h1 className="text-lg font-semibold">Run ID</h1>
+          <p className="text-md">{runId}</p>
           <Badge
             className={cn(
-              "capitalize",
+              "capitalize w-fit",
               experiment.status === "success"
                 ? "text-green-600 bg-green-200 hover:bg-green-200"
                 : "text-destructive bg-red-200 hover:bg-red-200"
@@ -45,9 +80,9 @@ export default function Experiments() {
         </div>
         <Button
           variant={
-            data?.length > 0 && experiment && experiment?.samples?.length > 0
-              ? "default"
-              : "outline"
+            experiment && experiment?.samples?.length > 0
+              ? "outline"
+              : "default"
           }
         >
           New Experiment
@@ -65,14 +100,17 @@ export default function Experiments() {
             disabled={!experiment?.samples || experiment?.samples?.length === 0}
             onClick={() => {
               setExpand(
-                expand.map(() => {
-                  return !expand[0];
-                })
+                expand &&
+                  expand.map(() => {
+                    return !expand[0];
+                  })
               );
             }}
           >
-            {expand.some((v: any) => v === false) && <MoveDiagonal size={20} />}
-            {!expand.some((v: any) => v === false) && <X size={20} />}
+            {expand && expand.some((v: any) => v === false) && (
+              <MoveDiagonal size={20} />
+            )}
+            {expand && !expand.some((v: any) => v === false) && <X size={20} />}
           </Button>
         </div>
         {experiment?.error && (
@@ -91,15 +129,14 @@ export default function Experiments() {
             </div>
           </div>
         )}
-        {!experiment ||
-          (experiment?.samples?.length === 0 && (
-            <div className="flex flex-col items-center gap-2 mt-6">
-              <p className="text-center text-md">
-                No samples found for this experiment.
-              </p>
-              <Button className="w-fit">New Experiment</Button>
-            </div>
-          ))}
+        {(!experiment?.samples || experiment?.samples?.length === 0) && (
+          <div className="flex flex-col items-center gap-2 mt-6">
+            <p className="text-center text-md">
+              No samples found for this experiment.
+            </p>
+            <Button className="w-fit">New Experiment</Button>
+          </div>
+        )}
         {experiment?.samples && experiment?.samples?.length > 0 && (
           <div className="overflow-y-scroll">
             <table className="table-auto overflow-x-scroll w-screen border-separate border border-muted rounded-md">
@@ -121,12 +158,13 @@ export default function Experiments() {
                     plan={experiment.plan}
                     sample={sample}
                     model={experiment.eval.model}
-                    expand={expand[i]}
+                    expand={expand ? expand[i] : false}
                     setExpand={(value: boolean, index: number) => {
                       setExpand(
-                        expand.map((_: any, j: number) => {
-                          return j === index ? value : expand[j];
-                        })
+                        expand &&
+                          expand.map((_: any, j: number) => {
+                            return j === index ? value : expand[j];
+                          })
                       );
                     }}
                   />
diff --git a/app/(protected)/project/[project_id]/experiments/compare/page.tsx b/app/(protected)/project/[project_id]/experiments/compare/page.tsx
index ab4a4686..58101d04 100644
--- a/app/(protected)/project/[project_id]/experiments/compare/page.tsx
+++ b/app/(protected)/project/[project_id]/experiments/compare/page.tsx
@@ -2,39 +2,63 @@
 
 import { Badge } from "@/components/ui/badge";
 import { Button } from "@/components/ui/button";
-import { Checkbox } from "@/components/ui/checkbox";
 import { cn } from "@/lib/utils";
 import { ChevronLeft } from "lucide-react";
-import { useRouter, useSearchParams } from "next/navigation";
+import { useParams, useRouter, useSearchParams } from "next/navigation";
 import { useState } from "react";
-import data from "../experiment_result";
+import { useQuery } from "react-query";
+import { toast } from "sonner";
 
 export default function Experiments() {
   const router = useRouter();
   // get run id from query params
   const searchParams = useSearchParams();
+  const projectId = useParams()?.project_id as string;
   const runIds = searchParams.getAll("run_id") as string[];
-  const experiments = data.filter((exp) => runIds.includes(exp.eval.run_id));
-  const isComparable = verifyIfSampleInputsMatch(runIds, data);
-  if (!isComparable) {
-    return (
-      <div className="flex flex-col items-center gap-2 mt-24">
-        <p className="text-center text-md">
-          The selected experiments are not comparable. Please select experiments
-          ran against the same dataset.
-        </p>
-        <Button className="w-fit">New Experiment</Button>
-      </div>
-    );
+  const [isComparable, setIsComparable] = useState<boolean>(false);
+
+  const {
+    data: experiments,
+    isLoading: experimentsLoading,
+    error: experimentsError,
+  } = useQuery({
+    queryKey: ["fetch-experiments-query", projectId, ...runIds],
+    queryFn: async () => {
+      const fetchPromises = runIds.map(async (runId) => {
+        const response = await fetch(
+          `/api/run?projectId=${projectId}&runId=${runId}`
+        );
+        if (!response.ok) {
+          const error = await response.json();
+          throw new Error(error?.message || "Failed to fetch the experiment");
+        }
+        const result = await response.json();
+        if (!result.run || !result.run.log) {
+          throw new Error("No experiment found");
+        }
+        return JSON.parse(result.run.log);
+      });
+
+      const exps = await Promise.all(fetchPromises);
+      setIsComparable(verifyIfSampleInputsMatch(exps));
+      return exps;
+    },
+    onError: (error) => {
+      toast.error("Failed to fetch one or more experiments", {
+        description: error instanceof Error ? error.message : String(error),
+      });
+    },
+  });
+
+  if (experimentsLoading) {
+    return <div>Loading...</div>;
   }
 
   return (
     <div className="w-full flex flex-col gap-4">
-      <div className="md:px-24 px-12 py-12 flex justify-between bg-muted">
-        {/* <h1 className="text-2xl font-semibold">Run ID: {runId}</h1> */}
-        <Button variant={data.length > 0 ? "default" : "outline"}>
-          New Experiment
-        </Button>
+      <div className="px-12 py-12 flex flex-col gap-2 bg-muted">
+        <h1 className="text-md font-semibold">Comparing Runs</h1>
+        <p className="text-sm w-1/2">{runIds.join(", ")}</p>
       </div>
       <div className="flex flex-col gap-12 w-full px-12">
         <div className="flex gap-2">
@@ -52,32 +76,49 @@ export default function Experiments() {
               <Button className="w-fit">New Experiment</Button>
             </div>
           ))}
-        {experiments[0]?.samples && experiments[0]?.samples?.length > 0 && (
-          <div className="overflow-y-scroll">
-            <table className="table-auto overflow-x-scroll w-screen border-separate border border-muted rounded-md">
-              <thead className="bg-muted">
-                <tr>
-                  <th className="w-12 rounded-md p-2">
-                    <Checkbox />
-                  </th>
-                  <th className="p-2 rounded-md text-sm font-medium">Input</th>
-                  <th className="p-2 rounded-md text-sm font-medium">Target</th>
-                  {experiments.map((experiment, i) => (
-                    <th
-                      key={i}
-                      className="p-2 rounded-md text-sm font-medium"
-                    >{`Output - (${experiment.eval.model})`}</th>
-                  ))}
-                </tr>
-              </thead>
-              <tbody>
-                {experiments[0].samples.map((_: any, i: number) => (
-                  <SampleRow key={i} index={i} experiments={experiments} />
-                ))}
-              </tbody>
-            </table>
+        {!isComparable && (
+          <div className="flex flex-col items-center gap-2 mt-24">
+            <p className="text-center text-md">
+              The selected experiments are not comparable. Please select
+              experiments ran against the same dataset.
+            </p>
+            <Button onClick={() => router.back()}>
+              <ChevronLeft className="text-muted-foreground" size={20} />
+              Back
+            </Button>
           </div>
         )}
+        {isComparable &&
+          experiments &&
+          experiments[0]?.samples &&
+          experiments[0]?.samples?.length > 0 && (
+            <div className="overflow-y-scroll">
+              <table className="table-auto overflow-x-scroll w-screen border-separate border border-muted rounded-md">
+                <thead className="bg-muted">
+                  <tr>
+                    <th className="p-2 rounded-md text-sm font-medium">
+                      Input
+                    </th>
+                    <th className="p-2 rounded-md text-sm font-medium">
+                      Target
+                    </th>
+                    {experiments &&
+                      experiments.map((experiment, i) => (
+                        <th
+                          key={i}
+                          className="p-2 rounded-md text-sm font-medium"
+                        >{`Output - (${experiment.eval.model})`}</th>
+                      ))}
+                  </tr>
+                </thead>
+                <tbody>
+                  {experiments[0].samples.map((_: any, i: number) => (
+                    <SampleRow key={i} index={i} experiments={experiments} />
+                  ))}
+                </tbody>
+              </table>
+            </div>
+          )}
       </div>
     </div>
   );
@@ -96,9 +137,6 @@ function SampleRow({
       className="hover:cursor-pointer hover:bg-muted group"
       onClick={() => setOpen(!open)}
     >
-      <td className="px-2 py-1 text-center">
-        <Checkbox onClick={(e) => e.stopPropagation()} />
-      </td>
       <td className={cn("text-sm px-2 py-1 max-w-80 relative")}>
         {typeof experiments[0]?.samples[index]?.input === "string"
           ? experiments[0]?.samples[index]?.input
@@ -144,12 +182,15 @@ function SampleRow({
   );
 }
 
-function verifyIfSampleInputsMatch(runIds: string[], data: any): boolean {
-  const experiments = data.filter((exp: any) =>
-    runIds.includes(exp.eval.run_id)
-  );
+function verifyIfSampleInputsMatch(experiments: any[]): boolean {
   if (experiments.length === 0) return false;
 
+  // also check if the length of samples is the same for all experiments
+  for (let j = 1; j < experiments?.length; j++) {
+    if (experiments[j]?.samples?.length !== experiments[0]?.samples?.length)
+      return false;
+  }
+
   // iterate through each experiment and each sample and check if the input of sample at index i matches with the input of sample at index i for all experiments
   for (let i = 0; i < experiments[0]?.samples?.length; i++) {
     const input = experiments[0]?.samples[i]?.input;
diff --git a/app/(protected)/project/[project_id]/experiments/experiment_result.ts b/app/(protected)/project/[project_id]/experiments/experiment_result.ts
deleted file mode 100644
index c37fe279..00000000
--- a/app/(protected)/project/[project_id]/experiments/experiment_result.ts
+++ /dev/null
@@ -1,207 +0,0 @@
-const data: any[] = [
-  {
-    version: 1,
-    status: "error",
-    eval: {
-      task: "security_guide",
-      task_version: 0,
-      task_file: "src/main.py",
-      task_id: "agwg9gxU5RHpXc3aALKijk",
-      run_id: "Kkg2c5UHPamfMk9eYfJLzD",
-      created: "2024-05-30T11:07:59",
-      dataset: {
-        name: "security_guide",
-        location: "example://security_guide",
-      },
-      model: "openai/gpt-4",
-      task_attribs: {},
-      task_args: {},
-      model_args: {},
-      config: {},
-      packages: {
-        inspect_ai: "0.3.10",
-      },
-    },
-    plan: {
-      name: "plan",
-      steps: [
-        {
-          solver: "chain_of_thought",
-          params: {},
-        },
-        {
-          solver: "generate",
-          params: {},
-        },
-        {
-          solver: "self_critique",
-          params: {},
-        },
-      ],
-      config: {},
-    },
-    stats: {
-      started_at: "2024-05-30T11:07:59",
-      completed_at: "2024-05-30T11:07:59",
-      model_usage: {},
-    },
-    error: {
-      message:
-        "ValueError('user_prompt requested from TaskState but none available')",
-      traceback:
-        'Traceback (most recent call last):\n\n  File "/Users/karthikkalyanaraman/work/inspect_ai_playground/pyenv/lib/python3.10/site-packages/inspect_ai/_eval/task/run.py", line 175, in task_run\n    scores = await asyncio.gather(*tasks)\n\n  File "/Users/karthikkalyanaraman/work/inspect_ai_playground/pyenv/lib/python3.10/site-packages/inspect_ai/_eval/task/run.py", line 256, in task_run_sample\n    state = await solver(state, generate)\n\n  File "/Users/karthikkalyanaraman/work/inspect_ai_playground/pyenv/lib/python3.10/site-packages/inspect_ai/solver/_prompt.py", line 33, in solve\n    prompt = state.user_prompt\n\n  File "/Users/karthikkalyanaraman/work/inspect_ai_playground/pyenv/lib/python3.10/site-packages/inspect_ai/solver/_solver.py", line 124, in user_prompt\n    raise ValueError("user_prompt requested from TaskState but none available")\n\nValueError: user_prompt requested from TaskState but none available\n',
-      traceback_ansi:
-        "\u001b[31m╭─\u001b[0m\u001b[31m──────────────────────────────\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31m───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n\u001b[31m│\u001b[0m \u001b[2;33m/Users/karthikkalyanaraman/work/inspect_ai_playground/pyenv/lib/python3.10/site-packages/inspect\u001b[0m \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[2;33m_ai/_eval/task/\u001b[0m\u001b[1;33mrun.py\u001b[0m:\u001b[94m175\u001b[0m in \u001b[92mtask_run\u001b[0m                                                            \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[2;33m/Users/karthikkalyanaraman/work/inspect_ai_playground/pyenv/lib/python3.10/site-packages/inspect\u001b[0m \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[2;33m_ai/_eval/task/\u001b[0m\u001b[1;33mrun.py\u001b[0m:\u001b[94m256\u001b[0m in \u001b[92mtask_run_sample\u001b[0m                                                     \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[2;33m/Users/karthikkalyanaraman/work/inspect_ai_playground/pyenv/lib/python3.10/site-packages/inspect\u001b[0m \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[2;33m_ai/solver/\u001b[0m\u001b[1;33m_prompt.py\u001b[0m:\u001b[94m33\u001b[0m in \u001b[92msolve\u001b[0m                                                                \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m                                                                                                  \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[2;33m/Users/karthikkalyanaraman/work/inspect_ai_playground/pyenv/lib/python3.10/site-packages/inspect\u001b[0m \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[2;33m_ai/solver/\u001b[0m\u001b[1;33m_solver.py\u001b[0m:\u001b[94m124\u001b[0m in \u001b[92muser_prompt\u001b[0m                                                         \u001b[31m│\u001b[0m\n\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n\u001b[1;91mValueError: \u001b[0muser_prompt requested from TaskState but none available\n",
-    },
-    logging: [],
-  },
-  {
-    version: 1,
-    status: "success",
-    eval: {
-      task: "security_guide",
-      task_version: 0,
-      task_file: "src/main.py",
-      task_id: "WaGqtQ7sZYWkmmDYcubjiv",
-      run_id: "ar7LzC2PefbchfLMosfAWF",
-      created: "2024-05-30T11:06:32",
-      dataset: {
-        name: "security_guide",
-        location: "example://security_guide",
-      },
-      model: "openai/gpt-3.5-turbo",
-      task_attribs: {},
-      task_args: {},
-      model_args: {},
-      config: {},
-      packages: {
-        inspect_ai: "0.3.10",
-      },
-    },
-    plan: {
-      name: "plan",
-      steps: [
-        {
-          solver: "chain_of_thought",
-          params: {},
-        },
-        {
-          solver: "generate",
-          params: {},
-        },
-        {
-          solver: "self_critique",
-          params: {},
-        },
-      ],
-      config: {},
-    },
-    results: {
-      scorer: {
-        name: "model_graded_fact",
-        params: {},
-      },
-      metrics: {
-        accuracy: {
-          name: "accuracy",
-          value: 1.0,
-          options: {},
-        },
-        bootstrap_std: {
-          name: "bootstrap_std",
-          value: 0.0,
-          options: {},
-        },
-      },
-    },
-    stats: {
-      started_at: "2024-05-30T11:06:32",
-      completed_at: "2024-05-30T11:06:45",
-      model_usage: {
-        "openai/gpt-3.5-turbo": {
-          input_tokens: 1677,
-          output_tokens: 728,
-          total_tokens: 2405,
-        },
-      },
-    },
-    samples: [
-      {
-        id: 1,
-        epoch: 1,
-        input: "How do I prevent SQL Injection attacks?",
-        target: "use parameterized queries and prepared statements",
-        messages: [
-          {
-            content:
-              '\nHow do I prevent SQL Injection attacks?\n\nBefore answering, reason in a step-by-step manner as to get the right answer. Provide your answer at the end on its own line in the form "ANSWER: $ANSWER" (without quotes) where $ANSWER is the answer to the question.\n',
-            source: "input",
-            role: "user",
-          },
-          {
-            content:
-              "1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack that allows attackers to manipulate the SQL code of a web application to gain unauthorized access to the database or to perform harmful actions.\n\n2. Sanitize user inputs: One of the most common ways attackers exploit SQL Injection vulnerabilities is through user inputs. Make sure to validate and sanitize all user inputs to prevent malicious code from being executed.\n\n3. Use parameterized queries: Instead of dynamically creating SQL queries by concatenating strings, use parameterized queries which automatically sanitize inputs by treating them as data rather than executable code.\n\n4. Limit database permissions: Ensure that the database user account used by your application has the least amount of privileges necessary. Avoid using an admin account for regular application tasks.\n\n5. Update and patch regularly: Make sure your database management system, web server, and application software are up to date with the latest security patches to protect against known vulnerabilities.\n\n6. Implement input validation: Validate and sanitize user inputs on both client-side and server-side to reduce the risk of SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, sanitize user inputs, use parameterized queries, limit database permissions, update and patch regularly, and implement input validation.",
-            source: "generate",
-            role: "assistant",
-          },
-          {
-            content:
-              '\nGiven the following question, initial answer and critique please generate an improved answer to the question:\n\n[BEGIN DATA]\n***\n[Question]: How do I prevent SQL Injection attacks?\n***\n[Answer]: 1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack that allows attackers to manipulate the SQL code of a web application to gain unauthorized access to the database or to perform harmful actions.\n\n2. Sanitize user inputs: One of the most common ways attackers exploit SQL Injection vulnerabilities is through user inputs. Make sure to validate and sanitize all user inputs to prevent malicious code from being executed.\n\n3. Use parameterized queries: Instead of dynamically creating SQL queries by concatenating strings, use parameterized queries which automatically sanitize inputs by treating them as data rather than executable code.\n\n4. Limit database permissions: Ensure that the database user account used by your application has the least amount of privileges necessary. Avoid using an admin account for regular application tasks.\n\n5. Update and patch regularly: Make sure your database management system, web server, and application software are up to date with the latest security patches to protect against known vulnerabilities.\n\n6. Implement input validation: Validate and sanitize user inputs on both client-side and server-side to reduce the risk of SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, sanitize user inputs, use parameterized queries, limit database permissions, update and patch regularly, and implement input validation.\n***\n[Critique]: The answer provides a good overview of ways to prevent SQL Injection attacks, but it lacks depth and specificity in explaining each point. It would be beneficial to provide more details and examples for each prevention method to ensure a comprehensive understanding for the reader. Additionally, the answer could benefit from explaining why each prevention method is effective in stopping SQL Injection attacks.\n***\n[END DATA]\n\nIf the original answer is already correct, just repeat the original answer exactly. Provide your answer at the end on its own line in the form "ANSWER: $ANSWER" (without quotes) where $ANSWER is the answer to the question.\n',
-            role: "user",
-          },
-          {
-            content:
-              "Improved Answer:\n1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack where malicious SQL statements are inserted into an entry field for execution. This can allow attackers to gain unauthorized access to data or manipulate it in unintended ways.\n\n2. Sanitize user inputs: By validating and sanitizing all user inputs, you can ensure that no malicious SQL code is included. This involves removing potentially harmful characters or encoding inputs to prevent attacks.\n\n3. Use parameterized queries: Parameterized queries separate SQL code from user input, treating them as data rather than executable code. This prevents attackers from injecting SQL statements through user inputs.\n\n4. Limit database permissions: Implement a principle of least privilege by granting only necessary permissions to database users. Avoid using overly permissive admin accounts for everyday tasks, reducing the potential impact of a successful attack.\n\n5. Update and patch regularly: Keeping all software components up to date with security patches is crucial to fixing known vulnerabilities that could be exploited by attackers. This helps in safeguarding against SQL Injection attacks.\n\n6. Implement input validation: Utilize both client-side and server-side validation mechanisms to verify and sanitize inputs before interacting with the database. This multi-layered approach can significantly reduce the risk of successful SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, it is essential to understand the nature of the attacks, sanitize user inputs, use parameterized queries, limit database permissions, update software regularly, and implement thorough input validation.",
-            source: "generate",
-            role: "assistant",
-          },
-        ],
-        output: {
-          model: "gpt-3.5-turbo-0125",
-          choices: [
-            {
-              message: {
-                content:
-                  "Improved Answer:\n1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack where malicious SQL statements are inserted into an entry field for execution. This can allow attackers to gain unauthorized access to data or manipulate it in unintended ways.\n\n2. Sanitize user inputs: By validating and sanitizing all user inputs, you can ensure that no malicious SQL code is included. This involves removing potentially harmful characters or encoding inputs to prevent attacks.\n\n3. Use parameterized queries: Parameterized queries separate SQL code from user input, treating them as data rather than executable code. This prevents attackers from injecting SQL statements through user inputs.\n\n4. Limit database permissions: Implement a principle of least privilege by granting only necessary permissions to database users. Avoid using overly permissive admin accounts for everyday tasks, reducing the potential impact of a successful attack.\n\n5. Update and patch regularly: Keeping all software components up to date with security patches is crucial to fixing known vulnerabilities that could be exploited by attackers. This helps in safeguarding against SQL Injection attacks.\n\n6. Implement input validation: Utilize both client-side and server-side validation mechanisms to verify and sanitize inputs before interacting with the database. This multi-layered approach can significantly reduce the risk of successful SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, it is essential to understand the nature of the attacks, sanitize user inputs, use parameterized queries, limit database permissions, update software regularly, and implement thorough input validation.",
-                source: "generate",
-                role: "assistant",
-              },
-              stop_reason: "stop",
-            },
-          ],
-          usage: {
-            input_tokens: 738,
-            output_tokens: 294,
-            total_tokens: 1032,
-          },
-        },
-        score: {
-          value: "C",
-          answer:
-            "Improved Answer:\n1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack where malicious SQL statements are inserted into an entry field for execution. This can allow attackers to gain unauthorized access to data or manipulate it in unintended ways.\n\n2. Sanitize user inputs: By validating and sanitizing all user inputs, you can ensure that no malicious SQL code is included. This involves removing potentially harmful characters or encoding inputs to prevent attacks.\n\n3. Use parameterized queries: Parameterized queries separate SQL code from user input, treating them as data rather than executable code. This prevents attackers from injecting SQL statements through user inputs.\n\n4. Limit database permissions: Implement a principle of least privilege by granting only necessary permissions to database users. Avoid using overly permissive admin accounts for everyday tasks, reducing the potential impact of a successful attack.\n\n5. Update and patch regularly: Keeping all software components up to date with security patches is crucial to fixing known vulnerabilities that could be exploited by attackers. This helps in safeguarding against SQL Injection attacks.\n\n6. Implement input validation: Utilize both client-side and server-side validation mechanisms to verify and sanitize inputs before interacting with the database. This multi-layered approach can significantly reduce the risk of successful SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, it is essential to understand the nature of the attacks, sanitize user inputs, use parameterized queries, limit database permissions, update software regularly, and implement thorough input validation.",
-          explanation:
-            "1. The expert answer mentions using parameterized queries and prepared statements as a way to prevent SQL Injection attacks.\n2. The submission also includes using parameterized queries as one of the ways to prevent SQL Injection attacks.\n3. The submission expands on the concept, explaining what SQL Injection attacks are, sanitizing user inputs, limiting database permissions, updating software regularly, and implementing input validation.\n4. All additional points mentioned in the submission are relevant and contribute to preventing SQL Injection attacks.\n\nAfter a thorough comparison, it is evident that the submission contains the content mentioned in the expert answer. \n\nGRADE: C",
-          metadata: {
-            grading: [
-              {
-                content:
-                  "\nYou are comparing a submitted answer to an expert answer on a given question. Here is the data:\n\n[BEGIN DATA]\n************\n[Question]: How do I prevent SQL Injection attacks?\n************\n[Expert]: use parameterized queries and prepared statements\n************\n[Submission]: Improved Answer:\n1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack where malicious SQL statements are inserted into an entry field for execution. This can allow attackers to gain unauthorized access to data or manipulate it in unintended ways.\n\n2. Sanitize user inputs: By validating and sanitizing all user inputs, you can ensure that no malicious SQL code is included. This involves removing potentially harmful characters or encoding inputs to prevent attacks.\n\n3. Use parameterized queries: Parameterized queries separate SQL code from user input, treating them as data rather than executable code. This prevents attackers from injecting SQL statements through user inputs.\n\n4. Limit database permissions: Implement a principle of least privilege by granting only necessary permissions to database users. Avoid using overly permissive admin accounts for everyday tasks, reducing the potential impact of a successful attack.\n\n5. Update and patch regularly: Keeping all software components up to date with security patches is crucial to fixing known vulnerabilities that could be exploited by attackers. This helps in safeguarding against SQL Injection attacks.\n\n6. Implement input validation: Utilize both client-side and server-side validation mechanisms to verify and sanitize inputs before interacting with the database. This multi-layered approach can significantly reduce the risk of successful SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, it is essential to understand the nature of the attacks, sanitize user inputs, use parameterized queries, limit database permissions, update software regularly, and implement thorough input validation.\n************\n[END DATA]\n\nCompare the factual content of the submitted answer with the expert answer. Ignore any differences in style, grammar, or punctuation.\n\nDoes the submission contain the content in the expert answer?\n\n\nAfter assessing the submitted answer, reply with 'GRADE: $LETTER' (without quotes) where LETTER is one of CI.  Please choose ONE option for the grade: either \"C\" for correct answers, or \"I\" for incorrect answers.\n\nFor example, after reviewing a correct answer you might write 'GRADE: C' or after reviewing an incorrect answer you might write 'GRADE: I'.\n\nFirst, write out in a step by step manner your reasoning about the criterion to be sure that your conclusion is correct. Avoid simply stating the correct answers at the outset. Then, end with your answer formatted as 'GRADE: $LETTER' (without quotes) where LETTER is one of CI.\n\n",
-                role: "user",
-              },
-              {
-                content:
-                  "1. The expert answer mentions using parameterized queries and prepared statements as a way to prevent SQL Injection attacks.\n2. The submission also includes using parameterized queries as one of the ways to prevent SQL Injection attacks.\n3. The submission expands on the concept, explaining what SQL Injection attacks are, sanitizing user inputs, limiting database permissions, updating software regularly, and implementing input validation.\n4. All additional points mentioned in the submission are relevant and contribute to preventing SQL Injection attacks.\n\nAfter a thorough comparison, it is evident that the submission contains the content mentioned in the expert answer. \n\nGRADE: C",
-                source: "generate",
-                role: "assistant",
-              },
-            ],
-          },
-        },
-        metadata: {},
-      },
-    ],
-    logging: [],
-  },
-];
-
-export default data;
diff --git a/app/(protected)/project/[project_id]/experiments/page-client.tsx b/app/(protected)/project/[project_id]/experiments/page-client.tsx
index 665cdf86..d5480106 100644
--- a/app/(protected)/project/[project_id]/experiments/page-client.tsx
+++ b/app/(protected)/project/[project_id]/experiments/page-client.tsx
@@ -4,24 +4,55 @@ import { Badge } from "@/components/ui/badge";
 import { Button } from "@/components/ui/button";
 import { Checkbox } from "@/components/ui/checkbox";
 import { cn, formatDateTime } from "@/lib/utils";
+import { Run } from "@prisma/client";
 import { useParams, useRouter } from "next/navigation";
 import { useState } from "react";
-import data from "./experiment_result";
+import { useQuery } from "react-query";
+import { toast } from "sonner";
 
 export default function Experiments() {
   const router = useRouter();
   const projectId = useParams()?.project_id as string;
   const [comparisonRunIds, setComparisonRunIds] = useState<string[]>([]);
+
+  const {
+    data: experiments,
+    isLoading: experimentsLoading,
+    error: experimentsError,
+  } = useQuery({
+    queryKey: ["fetch-experiments-query"],
+    queryFn: async () => {
+      const response = await fetch(`/api/run?projectId=${projectId}`);
+      if (!response.ok) {
+        const error = await response.json();
+        throw new Error(error?.message || "Failed to fetch experiments");
+      }
+      const result = await response.json();
+      return result;
+    },
+    onError: (error) => {
+      toast.error("Failed to fetch experiments", {
+        description: error instanceof Error ? error.message : String(error),
+      });
+    },
+  });
+
+  if (experimentsLoading) {
+    return <div>Loading...</div>;
+  }
+
   return (
     <div className="w-full flex flex-col gap-4">
       <div className="md:px-24 px-12 py-12 flex justify-between bg-muted">
         <h1 className="text-3xl font-semibold">Experiments</h1>
         <div className="flex gap-2">
-          <Button variant={data.length > 0 ? "default" : "outline"}>
+          <Button
+            variant={experiments?.runs?.length > 0 ? "outline" : "default"}
+          >
             New Experiment
           </Button>
           <Button
-            variant={"outline"}
+            variant={experiments?.runs?.length > 0 ? "default" : "outline"}
             disabled={comparisonRunIds.length < 2}
             onClick={() => {
               // append comparisonRunIds to query params. & only from the second run id
@@ -36,7 +67,7 @@ export default function Experiments() {
         </div>
       </div>
       <div className="flex flex-col gap-12 w-full px-12">
-        {data && data.length === 0 && (
+        {experiments && experiments.runs && experiments.runs?.length === 0 && (
           <div className="flex flex-col items-center gap-2 mt-24">
             <p className="text-center text-md">
               No experiments found. Get started by running your first
@@ -45,7 +76,7 @@ export default function Experiments() {
             <Button>New Experiment</Button>
           </div>
         )}
-        {data && data.length > 0 && (
+        {experiments && experiments.runs && experiments.runs?.length > 0 && (
           <div className="overflow-y-scroll">
             <table className="table-auto overflow-x-scroll w-max border-separate border border-muted rounded-md mt-6">
               <thead className="bg-muted">
@@ -76,91 +107,86 @@ export default function Experiments() {
                 </tr>
               </thead>
               <tbody>
-                {data.map((experiment) => (
-                  <tr
-                    key={experiment.eval.task_id}
-                    className="hover:cursor-pointer hover:bg-muted"
-                    onClick={() =>
-                      router.push(
-                        `/project/${projectId}/experiments/${experiment.eval.run_id}`
-                      )
-                    }
-                  >
-                    <td
-                      className="px-2 py-1 text-center"
-                      onClick={(e) => e.stopPropagation()}
+                {experiments?.runs.map((experiment: Run) => {
+                  const log: any = JSON.parse(experiment.log as string);
+                  return (
+                    <tr
+                      key={experiment.id}
+                      className="hover:cursor-pointer hover:bg-muted"
+                      onClick={() =>
+                        router.push(
+                          `/project/${projectId}/experiments/${log?.eval?.run_id}`
+                        )
+                      }
                     >
-                      <Checkbox
-                        disabled={experiment.status !== "success"}
-                        onCheckedChange={(value) => {
-                          if (value) {
-                            setComparisonRunIds([
-                              ...comparisonRunIds,
-                              experiment.eval.run_id,
-                            ]);
-                          } else {
-                            setComparisonRunIds(
-                              comparisonRunIds.filter(
-                                (id) => id !== experiment.eval.run_id
+                      <td
+                        className="px-2 py-1 text-center"
+                        onClick={(e) => e.stopPropagation()}
+                      >
+                        <Checkbox
+                          disabled={log?.status !== "success"}
+                          onCheckedChange={(value) => {
+                            if (value) {
+                              setComparisonRunIds([
+                                ...comparisonRunIds,
+                                log?.eval?.run_id,
+                              ]);
+                            } else {
+                              setComparisonRunIds(
+                                comparisonRunIds.filter(
+                                  (id) => id !== log?.eval?.run_id
+                                )
+                              );
+                            }
+                          }}
+                        />
+                      </td>
+                      <td className="text-sm px-2 py-1">{log?.eval?.run_id}</td>
+                      <td className="text-sm px-2 py-1">
+                        {formatDateTime(log?.stats?.started_at, true)}
+                      </td>
+                      <td className="text-sm px-2 py-1">
+                        {formatDateTime(log?.stats?.completed_at, true)}
+                      </td>
+                      <td className="text-sm px-2 py-1">{log?.eval?.task}</td>
+                      <td className="text-sm px-2 py-1">
+                        {log?.samples?.length || 0}
+                      </td>
+                      <td className="text-sm px-2 py-1">{log?.eval?.model}</td>
+                      <td className="text-sm px-2 py-1">{log?.plan?.name}</td>
+                      <td className="text-sm px-2 py-1">
+                        {log?.results?.scorer?.name || "N/A"}
+                      </td>
+                      <td className="text-sm px-2 py-1 flex flex-wrap gap-2 w-72">
+                        {log?.results?.metrics
+                          ? Object.keys(log?.results?.metrics).map(
+                              (metric, i) => (
+                                <Badge variant={"outline"} className="" key={i}>
+                                  {metric +
+                                    ": " +
+                                    (log?.results?.metrics as any)[
+                                      metric
+                                    ].value.toFixed(2)}
+                                </Badge>
                               )
-                            );
-                          }
-                        }}
-                      />
-                    </td>
-                    <td className="text-sm px-2 py-1">
-                      {experiment.eval.run_id}
-                    </td>
-                    <td className="text-sm px-2 py-1">
-                      {formatDateTime(experiment.stats.started_at, true)}
-                    </td>
-                    <td className="text-sm px-2 py-1">
-                      {formatDateTime(experiment.stats.completed_at, true)}
-                    </td>
-                    <td className="text-sm px-2 py-1">
-                      {experiment.eval.task}
-                    </td>
-                    <td className="text-sm px-2 py-1">
-                      {experiment.samples?.length || 0}
-                    </td>
-                    <td className="text-sm px-2 py-1">
-                      {experiment.eval.model}
-                    </td>
-                    <td className="text-sm px-2 py-1">
-                      {experiment.plan.name}
-                    </td>
-                    <td className="text-sm px-2 py-1">
-                      {experiment.results?.scorer?.name || "N/A"}
-                    </td>
-                    <td className="text-sm px-2 py-1 flex flex-wrap gap-2 w-72">
-                      {experiment.results?.metrics
-                        ? Object.keys(experiment.results?.metrics).map(
-                            (metric, i) => (
-                              <Badge variant={"outline"} className="" key={i}>
-                                {metric +
-                                  ": " +
-                                  (experiment.results.metrics as any)[
-                                    metric
-                                  ].value.toFixed(2)}
-                              </Badge>
                             )
-                          )
-                        : "N/A"}
-                    </td>
-                    <td className="px-2 py-1">
-                      <Badge
-                        className={cn(
-                          "capitalize",
-                          experiment.status === "success"
-                            ? "text-green-600 bg-green-200 hover:bg-green-200"
-                            : "text-destructive bg-red-200 hover:bg-red-200"
-                        )}
-                      >
-                        {experiment.status}
-                      </Badge>
-                    </td>
-                  </tr>
-                ))}
+                          : "N/A"}
+                      </td>
+                      <td className="px-2 py-1">
+                        <Badge
+                          className={cn(
+                            "capitalize",
+                            log?.status === "success"
+                              ? "text-green-600 bg-green-200 hover:bg-green-200"
+                              : "text-destructive bg-red-200 hover:bg-red-200"
+                          )}
+                        >
+                          {log?.status}
+                        </Badge>
+                      </td>
+                    </tr>
+                  );
+                })}
               </tbody>
             </table>
           </div>
diff --git a/app/api/dataset/download/route.ts b/app/api/dataset/download/route.ts
index 36564551..cb29e18f 100644
--- a/app/api/dataset/download/route.ts
+++ b/app/api/dataset/download/route.ts
@@ -1,21 +1,54 @@
 import { authOptions } from "@/lib/auth/options";
 import prisma from "@/lib/prisma";
-import json2csv from 'json2csv';
+import { authApiKey } from "@/lib/utils";
+import json2csv from "json2csv";
 import { getServerSession } from "next-auth";
 import { redirect } from "next/navigation";
 import { NextRequest, NextResponse } from "next/server";
 
 export async function GET(req: NextRequest) {
   try {
-    const session = await getServerSession(authOptions);
-    if (!session || !session.user) {
-      redirect("/login");
+    let projectId = "";
+    const apiKey = req.headers.get("x-api-key");
+    if (!apiKey) {
+      const session = await getServerSession(authOptions);
+      if (!session || !session.user) {
+        redirect("/login");
+      }
+    } else {
+      const response = await authApiKey(apiKey!);
+      if (response.status !== 200) {
+        return response;
+      }
+
+      // Get project data
+      const projectData = await response.json();
+      projectId = projectData.data.project.id;
+    }
+
+    if (!projectId) {
+      projectId = req.nextUrl.searchParams.get("projectId") as string;
+    }
+
+    const project = await prisma.project.findFirst({
+      where: {
+        id: projectId,
+      },
+    });
+
+    if (!project) {
+      return NextResponse.json(
+        {
+          message: "No projects found",
+        },
+        { status: 404 }
+      );
     }
+
     const datasetId = req.nextUrl.searchParams.get("id") as string;
     const pageParam = req.nextUrl.searchParams.get("page");
     let page = pageParam ? parseInt(pageParam, 10) : 1;
     const pageSize = 500;
-    let dataset;
     if (!datasetId) {
       return NextResponse.json(
         {
@@ -24,17 +57,15 @@ export async function GET(req: NextRequest) {
         { status: 404 }
       );
     }
-    else {
-      dataset = await prisma.dataset.findFirst({
-        where: {
-          id: datasetId,
-        },
-        include: {
-          Data: true,
-        },
-      });
+    const dataset = await prisma.dataset.findFirst({
+      where: {
+        id: datasetId,
+      },
+      include: {
+        Data: true,
+      },
+    });
 
-    }
     if (!dataset) {
       return NextResponse.json(
         {
@@ -55,15 +86,28 @@ export async function GET(req: NextRequest) {
       skip: (page - 1) * pageSize,
     });
 
-    const csv = json2csv.parse(data);
-    const datasetName = dataset.name.toLowerCase().replace(/\s+/g, '_');
-    const timestamp = new Date().toISOString().slice(0, 19).replace(/[-:]/g, '');
+    const result: any = [];
+    // convert the data to include only input, output and expectedOutput fields. Rename output to target
+    data.forEach((d) => {
+      result.push({
+        input: d.input,
+        target: d.output,
+        annotated_output: d.annotatedOutput,
+      });
+    });
+
+    const csv = json2csv.parse(result);
+    const datasetName = dataset.name.toLowerCase().replace(/\s+/g, "_");
+    const timestamp = new Date()
+      .toISOString()
+      .slice(0, 19)
+      .replace(/[-:]/g, "");
     const filename = `${datasetName}_${timestamp}.csv`;
 
     return new NextResponse(csv, {
       headers: {
-        'Content-Type': 'text/csv',
-        'Content-Disposition': `filename:${filename}`,
+        "Content-Type": "text/csv",
+        "Content-Disposition": `filename:${filename}`,
       },
     });
   } catch (error) {
@@ -74,4 +118,4 @@ export async function GET(req: NextRequest) {
       { status: 500 }
     );
   }
-}
\ No newline at end of file
+}
diff --git a/app/api/run/route.ts b/app/api/run/route.ts
new file mode 100644
index 00000000..0276b0da
--- /dev/null
+++ b/app/api/run/route.ts
@@ -0,0 +1,198 @@
+import { authOptions } from "@/lib/auth/options";
+import prisma from "@/lib/prisma";
+import { authApiKey } from "@/lib/utils";
+import { getServerSession } from "next-auth";
+import { redirect } from "next/navigation";
+import { NextRequest, NextResponse } from "next/server";
+
+export async function GET(req: NextRequest) {
+  try {
+    let projectId = "";
+    const session = await getServerSession(authOptions);
+    if (!session || !session.user) {
+      const apiKey = req.headers.get("x-api-key");
+      if (!apiKey) {
+        redirect("/login");
+      }
+
+      const response = await authApiKey(apiKey!);
+      if (response.status !== 200) {
+        return response;
+      }
+
+      // Get project data
+      const projectData = await response.json();
+      projectId = projectData.data.project.id;
+    }
+
+    if (!projectId) {
+      projectId = req.nextUrl.searchParams.get("projectId") as string;
+    }
+
+    const project = await prisma.project.findFirst({
+      where: {
+        id: projectId,
+      },
+    });
+
+    if (!project) {
+      return NextResponse.json(
+        {
+          message: "No projects found",
+        },
+        { status: 404 }
+      );
+    }
+
+    // Get run by id
+    const runId = req.nextUrl.searchParams.get("runId");
+    if (runId) {
+      const run = await prisma.run.findFirst({
+        where: {
+          projectId: projectId,
+          runId: runId,
+        },
+      });
+
+      return NextResponse.json({
+        run: run,
+      });
+    }
+
+    // Get runs
+    const pageParam = req.nextUrl.searchParams.get("page");
+    const pageSizeParam = req.nextUrl.searchParams.get("pageSize");
+    let page = pageParam ? parseInt(pageParam, 10) : 1;
+    const pageSize = pageSizeParam ? parseInt(pageSizeParam, 10) : 10;
+
+    const totalLen = await prisma.run.count({
+      where: {
+        projectId: projectId,
+      },
+    });
+
+    const totalPages =
+      Math.ceil(totalLen / pageSize) === 0 ? 1 : Math.ceil(totalLen / pageSize);
+    const md = { page, page_size: pageSize, total_pages: totalPages };
+
+    if (page! > totalPages) {
+      page = totalPages;
+    }
+
+    const runs = await prisma.run.findMany({
+      where: {
+        projectId: projectId,
+      },
+      orderBy: {
+        createdAt: "desc",
+      },
+      take: pageSize,
+      skip: (page - 1) * pageSize,
+    });
+
+    return NextResponse.json({
+      runs: runs,
+      metadata: md,
+    });
+  } catch (error) {
+    return NextResponse.json(
+      {
+        message: "Internal server error",
+      },
+      { status: 500 }
+    );
+  }
+}
+
+export async function POST(req: NextRequest) {
+  const apiKey = req.headers.get("x-api-key");
+  if (!apiKey) {
+    redirect("/login");
+  }
+
+  const response = await authApiKey(apiKey!);
+  if (response.status !== 200) {
+    return response;
+  }
+
+  // Get project data
+  const projectData = await response.json();
+  const projectId = projectData.data.project.id;
+
+  const d = await req.json();
+  const { runId, taskId, description, wfVersion, log, datasetId } = d;
+
+  const payload: any = {
+    runId,
+    taskId,
+    log,
+    projectId,
+  };
+
+  if (description) {
+    payload["description"] = description;
+  }
+  if (wfVersion) {
+    payload["wfVersion"] = wfVersion;
+  }
+  if (datasetId) {
+    payload["datasetId"] = datasetId;
+  }
+
+  const run = await prisma.run.create({
+    data: payload,
+  });
+
+  return NextResponse.json({
+    run: run,
+  });
+}
+
+export async function PUT(req: NextRequest) {
+  const session = await getServerSession(authOptions);
+  if (!session || !session.user) {
+    redirect("/login");
+  }
+
+  const d = await req.json();
+  const { id, description, wfVersion, datasetId } = d;
+  const payload: any = {};
+  if (description) {
+    payload["description"] = description;
+  }
+  if (wfVersion) {
+    payload["wfVersion"] = wfVersion;
+  }
+  if (datasetId) {
+    payload["datasetId"] = datasetId;
+  }
+
+  const run = await prisma.dataset.update({
+    where: {
+      id,
+    },
+    data: payload,
+  });
+
+  return NextResponse.json({
+    run: run,
+  });
+}
+
+export async function DELETE(req: NextRequest) {
+  const session = await getServerSession(authOptions);
+  if (!session || !session.user) {
+    redirect("/login");
+  }
+
+  const data = await req.json();
+  const { id } = data;
+
+  await prisma.run.delete({
+    where: {
+      id,
+    },
+  });
+
+  return NextResponse.json({});
+}
diff --git a/components/shared/download-dataset.tsx b/components/shared/download-dataset.tsx
index e6b550bf..2fe0636b 100644
--- a/components/shared/download-dataset.tsx
+++ b/components/shared/download-dataset.tsx
@@ -12,16 +12,17 @@ import {
   DialogTitle,
   DialogTrigger,
 } from "@/components/ui/dialog";
-import { useState } from 'react';
+import { useState } from "react";
 import { toast } from "sonner";
 
 export function DownloadDataset({
+  projectId,
   datasetId,
-  disabled=false,
+  disabled = false,
 }: {
+  projectId: string;
   datasetId: string;
   disabled?: boolean;
-
 }) {
   const [open, setOpen] = useState(false);
   const [busy, setBusy] = useState(false);
@@ -29,23 +30,26 @@ export function DownloadDataset({
     setBusy(true);
     try {
       datasetId = datasetId.toString();
-      const response = await fetch(`/api/dataset/download?id=${datasetId}`, {
-        method: "GET",
-        headers: {
-          "Content-Type": "text/csv",
-        },
-      });
+      const response = await fetch(
+        `/api/dataset/download?id=${datasetId}&projectId=${projectId}`,
+        {
+          method: "GET",
+          headers: {
+            "Content-Type": "text/csv",
+          },
+        }
+      );
 
       if (!response.ok) {
         throw new Error("Failed to download dataset.");
       }
 
       // Extract filename from Content-Disposition header
-      const contentDisposition = response.headers.get('Content-Disposition');
+      const contentDisposition = response.headers.get("Content-Disposition");
 
       let filename;
       if (contentDisposition) {
-        const filenameKeyValue = contentDisposition.split(':')[1].split('.');
+        const filenameKeyValue = contentDisposition.split(":")[1].split(".");
 
         if (filenameKeyValue.length === 2) {
           filename = filenameKeyValue[0];
@@ -74,7 +78,7 @@ export function DownloadDataset({
   return (
     <Dialog open={open} onOpenChange={setOpen}>
       <DialogTrigger asChild>
-        <Button size={'icon'} variant={'outline'} disabled={disabled}>
+        <Button size={"icon"} variant={"outline"} disabled={disabled}>
           <DownloadIcon className="h-4 w-4 shrink-0" />
         </Button>
       </DialogTrigger>
@@ -82,11 +86,16 @@ export function DownloadDataset({
         <DialogHeader>
           <DialogTitle>Download Dataset</DialogTitle>
           <DialogDescription>
-            This will download the data as .csv and only up to a maximum of 500 records. To download the entire dataset, please contact us.
+            This will download the data as .csv and only up to a maximum of 500
+            records. To download the entire dataset, please contact us.
           </DialogDescription>
         </DialogHeader>
         <DialogFooter>
-          <Button variant={'outline'} onClick={() => setOpen(false)} disabled={busy}>
+          <Button
+            variant={"outline"}
+            onClick={() => setOpen(false)}
+            disabled={busy}
+          >
             Cancel
           </Button>
           <Button disabled={busy} onClick={handleDownload}>
@@ -96,4 +105,4 @@ export function DownloadDataset({
       </DialogContent>
     </Dialog>
   );
-}
\ No newline at end of file
+}
diff --git a/prisma/schema.prisma b/prisma/schema.prisma
index 2ab4a476..89585f57 100644
--- a/prisma/schema.prisma
+++ b/prisma/schema.prisma
@@ -95,10 +95,18 @@ model Evaluation {
 }
 
 model Run {
-  id          String  @id @default(cuid())
-  name        String
+  id          String   @id @default(cuid())
+  runId       String
+  taskId      String
   description String?
-  Data        Data[]
+  wfVersion   String?
+  log         Json?
+  datasetId   String?
+  projectId   String
+  Dataset     Dataset? @relation(fields: [datasetId], references: [id])
+  Project     Project  @relation(fields: [projectId], references: [id], onDelete: Cascade)
+  createdAt   DateTime @default(now())
+  updatedAt   DateTime @updatedAt
 }
 
 model Prompt {
@@ -119,22 +127,20 @@ model Prompt {
 }
 
 model Data {
-  id             String       @id @default(cuid())
-  input          String
-  output         String
-  contexts       String[]
-  expectedOutput String?
-  note           String?
-  spanId         String?
-  Evaluation     Evaluation[]
-  runId          String?
-  Run            Run?         @relation(fields: [runId], references: [id], onDelete: Cascade)
-  datasetId      String?
-  Dataset        Dataset?     @relation(fields: [datasetId], references: [id], onDelete: Cascade)
-  projectId      String?
-  Project        Project?     @relation(fields: [projectId], references: [id], onDelete: Cascade)
-  createdAt      DateTime     @default(now())
-  updatedAt      DateTime     @updatedAt
+  id              String       @id @default(cuid())
+  input           String
+  output          String
+  contexts        String[]
+  annotatedOutput String?
+  note            String?
+  spanId          String?
+  Evaluation      Evaluation[]
+  datasetId       String?
+  Dataset         Dataset?     @relation(fields: [datasetId], references: [id], onDelete: Cascade)
+  projectId       String?
+  Project         Project?     @relation(fields: [projectId], references: [id], onDelete: Cascade)
+  createdAt       DateTime     @default(now())
+  updatedAt       DateTime     @updatedAt
 }
 
 model Dataset {
@@ -146,6 +152,7 @@ model Dataset {
   createdAt   DateTime @default(now())
   updatedAt   DateTime @updatedAt
   Data        Data[]
+  Run         Run[]
 }
 
 model Promptset {
@@ -184,6 +191,7 @@ model Project {
   Promptset   Promptset[]
   Test        Test[]
   Data        Data[]
+  Run         Run[]
 }
 
 model TeamInvite {

From 699f37e7497f45b8582a8b24bffdecb305bead3d Mon Sep 17 00:00:00 2001
From: Karthik Kalyanaraman <karthik@scale3labs.com>
Date: Wed, 12 Jun 2024 21:32:24 -0700
Subject: [PATCH 08/16] Experiments pagination

---
 .../[project_id]/experiments/page-client.tsx  | 58 ++++++++++++++-----
 1 file changed, 43 insertions(+), 15 deletions(-)

diff --git a/app/(protected)/project/[project_id]/experiments/page-client.tsx b/app/(protected)/project/[project_id]/experiments/page-client.tsx
index d5480106..13130120 100644
--- a/app/(protected)/project/[project_id]/experiments/page-client.tsx
+++ b/app/(protected)/project/[project_id]/experiments/page-client.tsx
@@ -7,6 +7,7 @@ import { cn, formatDateTime } from "@/lib/utils";
 import { Run } from "@prisma/client";
 import { useParams, useRouter } from "next/navigation";
 import { useState } from "react";
+import { useBottomScrollListener } from "react-bottom-scroll-listener";
 import { useQuery } from "react-query";
 import { toast } from "sonner";
 
@@ -14,15 +15,27 @@ export default function Experiments() {
   const router = useRouter();
   const projectId = useParams()?.project_id as string;
   const [comparisonRunIds, setComparisonRunIds] = useState<string[]>([]);
+  const [page, setPage] = useState<number>(1);
+  const [totalPages, setTotalPages] = useState<number>(1);
+  const [currentData, setCurrentData] = useState<any>([]);
+  const [showLoader, setShowLoader] = useState(false);
 
-  const {
-    data: experiments,
-    isLoading: experimentsLoading,
-    error: experimentsError,
-  } = useQuery({
+  const scrollableDivRef = useBottomScrollListener(() => {
+    if (fetchExperiments.isRefetching) {
+      return;
+    }
+    if (page <= totalPages) {
+      setShowLoader(true);
+      fetchExperiments.refetch();
+    }
+  });
+
+  const fetchExperiments = useQuery({
     queryKey: ["fetch-experiments-query"],
     queryFn: async () => {
-      const response = await fetch(`/api/run?projectId=${projectId}`);
+      const response = await fetch(
+        `/api/run?projectId=${projectId}&page=${page}&pageSize=25`
+      );
       if (!response.ok) {
         const error = await response.json();
         throw new Error(error?.message || "Failed to fetch experiments");
@@ -30,6 +43,23 @@ export default function Experiments() {
       const result = await response.json();
       return result;
     },
+    onSuccess: (data) => {
+      const newData = data.runs || [];
+      const metadata = data?.metadata || {};
+
+      setTotalPages(parseInt(metadata?.total_pages) || 1);
+      if (parseInt(metadata?.page) <= parseInt(metadata?.total_pages)) {
+        setPage(parseInt(metadata?.page) + 1);
+      }
+      // Merge the new data with the existing data
+      if (currentData.length > 0) {
+        const updatedData = [...currentData, ...newData];
+        setCurrentData(updatedData);
+      } else {
+        setCurrentData(newData);
+      }
+      setShowLoader(false);
+    },
     onError: (error) => {
       toast.error("Failed to fetch experiments", {
         description: error instanceof Error ? error.message : String(error),
@@ -37,7 +67,7 @@ export default function Experiments() {
     },
   });
 
-  if (experimentsLoading) {
+  if (fetchExperiments.isLoading) {
     return <div>Loading...</div>;
   }
 
@@ -46,13 +76,11 @@ export default function Experiments() {
       <div className="md:px-24 px-12 py-12 flex justify-between bg-muted">
         <h1 className="text-3xl font-semibold">Experiments</h1>
         <div className="flex gap-2">
-          <Button
-            variant={experiments?.runs?.length > 0 ? "outline" : "default"}
-          >
+          <Button variant={currentData.length > 0 ? "outline" : "default"}>
             New Experiment
           </Button>
           <Button
-            variant={experiments?.runs?.length > 0 ? "default" : "outline"}
+            variant={currentData.length > 0 ? "default" : "outline"}
             disabled={comparisonRunIds.length < 2}
             onClick={() => {
               // append comparisonRunIds to query params. & only from the second run id
@@ -67,7 +95,7 @@ export default function Experiments() {
         </div>
       </div>
       <div className="flex flex-col gap-12 w-full px-12">
-        {experiments && experiments.runs && experiments.runs?.length === 0 && (
+        {currentData.length === 0 && (
           <div className="flex flex-col items-center gap-2 mt-24">
             <p className="text-center text-md">
               No experiments found. Get started by running your first
@@ -76,8 +104,8 @@ export default function Experiments() {
             <Button>New Experiment</Button>
           </div>
         )}
-        {experiments && experiments.runs && experiments.runs?.length > 0 && (
-          <div className="overflow-y-scroll">
+        {currentData.length > 0 && (
+          <div className="overflow-y-scroll" ref={scrollableDivRef as any}>
             <table className="table-auto overflow-x-scroll w-max border-separate border border-muted rounded-md mt-6">
               <thead className="bg-muted">
                 <tr>
@@ -107,7 +135,7 @@ export default function Experiments() {
                 </tr>
               </thead>
               <tbody>
-                {experiments?.runs.map((experiment: Run) => {
+                {currentData.map((experiment: Run) => {
                   const log: any = JSON.parse(experiment.log as string);
                   return (
                     <tr

From 987173f7c1abef9589f02d218b44d062eeefa163 Mon Sep 17 00:00:00 2001
From: Karthik Kalyanaraman <karthik@scale3labs.com>
Date: Wed, 12 Jun 2024 21:35:34 -0700
Subject: [PATCH 09/16] deduplicate paginated data

---
 .../project/[project_id]/experiments/page-client.tsx       | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/app/(protected)/project/[project_id]/experiments/page-client.tsx b/app/(protected)/project/[project_id]/experiments/page-client.tsx
index 13130120..b7895152 100644
--- a/app/(protected)/project/[project_id]/experiments/page-client.tsx
+++ b/app/(protected)/project/[project_id]/experiments/page-client.tsx
@@ -54,7 +54,12 @@ export default function Experiments() {
       // Merge the new data with the existing data
       if (currentData.length > 0) {
         const updatedData = [...currentData, ...newData];
-        setCurrentData(updatedData);
+        // Remove duplicates
+        const uniqueData = updatedData.filter(
+          (v: any, i: number, a: any) =>
+            a.findIndex((t: any) => t.id === v.id) === i
+        );
+        setCurrentData(uniqueData);
       } else {
         setCurrentData(newData);
       }

From 1930af7bb311ddd72a5ed31214f84b41c8bfc5c3 Mon Sep 17 00:00:00 2001
From: Karthik Kalyanaraman <karthik@scale3labs.com>
Date: Wed, 12 Jun 2024 22:11:05 -0700
Subject: [PATCH 10/16] Fix loading and error states - experiments

---
 .../experiments/[run_id]/page.tsx             | 158 +++++----
 .../[project_id]/experiments/compare/page.tsx |  39 ++-
 .../[project_id]/experiments/page-client.tsx  | 312 +++++++++++-------
 3 files changed, 298 insertions(+), 211 deletions(-)

diff --git a/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx b/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx
index b9e93ed5..326a72c8 100644
--- a/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx
+++ b/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx
@@ -12,6 +12,7 @@ import {
   SheetHeader,
   SheetTitle,
 } from "@/components/ui/sheet";
+import { Skeleton } from "@/components/ui/skeleton";
 import { cn } from "@/lib/utils";
 import { ChevronLeft, ChevronRight, MoveDiagonal, X } from "lucide-react";
 import { useParams, useRouter } from "next/navigation";
@@ -57,26 +58,24 @@ export default function Experiments() {
     },
   });
 
-  if (experimentLoading) {
-    return <div>Loading...</div>;
-  }
-
   return (
     <div className="w-full flex flex-col gap-4">
       <div className="px-12 py-12 flex justify-between bg-muted">
         <div className="flex flex-col gap-2">
           <h1 className="text-lg font-semibold">Run ID</h1>
           <p className="text-md">{runId}</p>
-          <Badge
-            className={cn(
-              "capitalize w-fit",
-              experiment.status === "success"
-                ? "text-green-600 bg-green-200 hover:bg-green-200"
-                : "text-destructive bg-red-200 hover:bg-red-200"
-            )}
-          >
-            {experiment.status}
-          </Badge>
+          {!experimentError && !experimentLoading && (
+            <Badge
+              className={cn(
+                "capitalize w-fit",
+                experiment.status === "success"
+                  ? "text-green-600 bg-green-200 hover:bg-green-200"
+                  : "text-destructive bg-red-200 hover:bg-red-200"
+              )}
+            >
+              {experiment.status}
+            </Badge>
+          )}
         </div>
         <Button
           variant={
@@ -94,24 +93,30 @@ export default function Experiments() {
             <ChevronLeft className="text-muted-foreground" size={20} />
             Back
           </Button>
-          <Button
-            variant={"outline"}
-            size={"icon"}
-            disabled={!experiment?.samples || experiment?.samples?.length === 0}
-            onClick={() => {
-              setExpand(
-                expand &&
-                  expand.map(() => {
-                    return !expand[0];
-                  })
-              );
-            }}
-          >
-            {expand && expand.some((v: any) => v === false) && (
-              <MoveDiagonal size={20} />
-            )}
-            {expand && !expand.some((v: any) => v === false) && <X size={20} />}
-          </Button>
+          {!experimentError && !experimentLoading && (
+            <Button
+              variant={"outline"}
+              size={"icon"}
+              disabled={
+                !experiment?.samples || experiment?.samples?.length === 0
+              }
+              onClick={() => {
+                setExpand(
+                  expand &&
+                    expand.map(() => {
+                      return !expand[0];
+                    })
+                );
+              }}
+            >
+              {expand && expand.some((v: any) => v === false) && (
+                <MoveDiagonal size={20} />
+              )}
+              {expand && !expand.some((v: any) => v === false) && (
+                <X size={20} />
+              )}
+            </Button>
+          )}
         </div>
         {experiment?.error && (
           <div className="flex flex-col gap-4">
@@ -129,50 +134,65 @@ export default function Experiments() {
             </div>
           </div>
         )}
-        {(!experiment?.samples || experiment?.samples?.length === 0) && (
+        {experimentError && (
           <div className="flex flex-col items-center gap-2 mt-6">
             <p className="text-center text-md">
-              No samples found for this experiment.
+              Failed to fetch the experiment. Please try again later.
             </p>
-            <Button className="w-fit">New Experiment</Button>
           </div>
         )}
-        {experiment?.samples && experiment?.samples?.length > 0 && (
-          <div className="overflow-y-scroll">
-            <table className="table-auto overflow-x-scroll w-screen border-separate border border-muted rounded-md">
-              <thead className="bg-muted">
-                <tr>
-                  <th className="p-2 rounded-md text-sm font-medium">Input</th>
-                  <th className="p-2 rounded-md text-sm font-medium">Target</th>
-                  <th className="p-2 rounded-md text-sm font-medium">{`Output - (${experiment.eval.model})`}</th>
-                  <th className="p-2 rounded-md text-sm font-medium">
-                    Explanation
-                  </th>
-                </tr>
-              </thead>
-              <tbody>
-                {experiment.samples.map((sample: any, i: number) => (
-                  <SampleRow
-                    key={i}
-                    index={i}
-                    plan={experiment.plan}
-                    sample={sample}
-                    model={experiment.eval.model}
-                    expand={expand ? expand[i] : false}
-                    setExpand={(value: boolean, index: number) => {
-                      setExpand(
-                        expand &&
-                          expand.map((_: any, j: number) => {
-                            return j === index ? value : expand[j];
-                          })
-                      );
-                    }}
-                  />
-                ))}
-              </tbody>
-            </table>
+        {((!experimentError && !experimentLoading && !experiment?.samples) ||
+          experiment?.samples?.length === 0) && (
+          <div className="flex flex-col items-center gap-2 mt-6">
+            <p className="text-center text-md">
+              No samples found for this experiment.
+            </p>
+            <Button className="w-fit">New Experiment</Button>
           </div>
         )}
+        {!experimentLoading &&
+          experiment?.samples &&
+          experiment?.samples?.length > 0 && (
+            <div className="overflow-y-scroll">
+              <table className="table-auto overflow-x-scroll w-screen border-separate border border-muted rounded-md">
+                <thead className="bg-muted">
+                  <tr>
+                    <th className="p-2 rounded-md text-sm font-medium">
+                      Input
+                    </th>
+                    <th className="p-2 rounded-md text-sm font-medium">
+                      Target
+                    </th>
+                    <th className="p-2 rounded-md text-sm font-medium">{`Output - (${experiment.eval.model})`}</th>
+                    <th className="p-2 rounded-md text-sm font-medium">
+                      Explanation
+                    </th>
+                  </tr>
+                </thead>
+                <tbody>
+                  {experiment.samples.map((sample: any, i: number) => (
+                    <SampleRow
+                      key={i}
+                      index={i}
+                      plan={experiment.plan}
+                      sample={sample}
+                      model={experiment.eval.model}
+                      expand={expand ? expand[i] : false}
+                      setExpand={(value: boolean, index: number) => {
+                        setExpand(
+                          expand &&
+                            expand.map((_: any, j: number) => {
+                              return j === index ? value : expand[j];
+                            })
+                        );
+                      }}
+                    />
+                  ))}
+                </tbody>
+              </table>
+            </div>
+          )}
+        {experimentLoading && <Skeleton className="w-full h-96" />}
       </div>
     </div>
   );
diff --git a/app/(protected)/project/[project_id]/experiments/compare/page.tsx b/app/(protected)/project/[project_id]/experiments/compare/page.tsx
index 58101d04..d03db789 100644
--- a/app/(protected)/project/[project_id]/experiments/compare/page.tsx
+++ b/app/(protected)/project/[project_id]/experiments/compare/page.tsx
@@ -2,6 +2,7 @@
 
 import { Badge } from "@/components/ui/badge";
 import { Button } from "@/components/ui/button";
+import { Skeleton } from "@/components/ui/skeleton";
 import { cn } from "@/lib/utils";
 import { ChevronLeft } from "lucide-react";
 import { useParams, useRouter, useSearchParams } from "next/navigation";
@@ -50,10 +51,6 @@ export default function Experiments() {
     },
   });
 
-  if (experimentsLoading) {
-    return <div>Loading...</div>;
-  }
-
   return (
     <div className="w-full flex flex-col gap-4">
       <div className="px-12 py-12 flex flex-col gap-2 bg-muted">
@@ -67,16 +64,26 @@ export default function Experiments() {
             Back
           </Button>
         </div>
-        {!experiments ||
-          (experiments?.length === 0 && (
-            <div className="flex flex-col items-center gap-2 mt-6">
-              <p className="text-center text-md">
-                No experiments found for comparison.
-              </p>
-              <Button className="w-fit">New Experiment</Button>
-            </div>
-          ))}
-        {!isComparable && (
+        {!experimentsLoading &&
+          !experimentsError &&
+          (!experiments ||
+            (experiments?.length === 0 && (
+              <div className="flex flex-col items-center gap-2 mt-6">
+                <p className="text-center text-md">
+                  No experiments found for comparison.
+                </p>
+                <Button className="w-fit">New Experiment</Button>
+              </div>
+            )))}
+        {experimentsError && !experimentsLoading && (
+          <div className="flex flex-col items-center gap-2 mt-6">
+            <p className="text-center text-md">
+              Something went wrong while fetching the experiments. Please try
+              again.
+            </p>
+          </div>
+        )}
+        {!experimentsLoading && !isComparable && (
           <div className="flex flex-col items-center gap-2 mt-24">
             <p className="text-center text-md">
               The selected experiments are not comparable. Please select
@@ -88,7 +95,8 @@ export default function Experiments() {
             </Button>
           </div>
         )}
-        {isComparable &&
+        {!experimentsLoading &&
+          isComparable &&
           experiments &&
           experiments[0]?.samples &&
           experiments[0]?.samples?.length > 0 && (
@@ -119,6 +127,7 @@ export default function Experiments() {
               </table>
             </div>
           )}
+        {experimentsLoading && <Skeleton className="w-full h-96" />}
       </div>
     </div>
   );
diff --git a/app/(protected)/project/[project_id]/experiments/page-client.tsx b/app/(protected)/project/[project_id]/experiments/page-client.tsx
index b7895152..45eb665b 100644
--- a/app/(protected)/project/[project_id]/experiments/page-client.tsx
+++ b/app/(protected)/project/[project_id]/experiments/page-client.tsx
@@ -3,6 +3,7 @@
 import { Badge } from "@/components/ui/badge";
 import { Button } from "@/components/ui/button";
 import { Checkbox } from "@/components/ui/checkbox";
+import { Skeleton } from "@/components/ui/skeleton";
 import { cn, formatDateTime } from "@/lib/utils";
 import { Run } from "@prisma/client";
 import { useParams, useRouter } from "next/navigation";
@@ -72,10 +73,6 @@ export default function Experiments() {
     },
   });
 
-  if (fetchExperiments.isLoading) {
-    return <div>Loading...</div>;
-  }
-
   return (
     <div className="w-full flex flex-col gap-4">
       <div className="md:px-24 px-12 py-12 flex justify-between bg-muted">
@@ -99,132 +96,193 @@ export default function Experiments() {
           </Button>
         </div>
       </div>
-      <div className="flex flex-col gap-12 w-full px-12">
-        {currentData.length === 0 && (
-          <div className="flex flex-col items-center gap-2 mt-24">
-            <p className="text-center text-md">
-              No experiments found. Get started by running your first
-              experiment.
-            </p>
-            <Button>New Experiment</Button>
-          </div>
-        )}
-        {currentData.length > 0 && (
-          <div className="overflow-y-scroll" ref={scrollableDivRef as any}>
-            <table className="table-auto overflow-x-scroll w-max border-separate border border-muted rounded-md mt-6">
-              <thead className="bg-muted">
-                <tr>
-                  <th className="w-12 rounded-md p-2">
-                    <Checkbox disabled={true} />
-                  </th>
-                  <th className="p-2 rounded-md text-sm font-medium">Run ID</th>
-                  <th className="p-2 rounded-md text-sm font-medium">
-                    Started at
-                  </th>
-                  <th className="p-2 rounded-md text-sm font-medium">
-                    Completed at
-                  </th>
-                  <th className="p-2 rounded-md text-sm font-medium">Task</th>
-                  <th className="p-2 rounded-md text-sm font-medium">
-                    Total Samples
-                  </th>
-                  <th className="p-2 rounded-md text-sm font-medium">Model</th>
-                  <th className="p-2 rounded-md text-sm font-medium">
-                    Plan Name
-                  </th>
-                  <th className="p-2 rounded-md text-sm font-medium">Scorer</th>
-                  <th className="p-2 rounded-md text-sm font-medium">
-                    Metrics
-                  </th>
-                  <th className="p-2 rounded-md text-sm font-medium">Status</th>
-                </tr>
-              </thead>
-              <tbody>
-                {currentData.map((experiment: Run) => {
-                  const log: any = JSON.parse(experiment.log as string);
-                  return (
-                    <tr
-                      key={experiment.id}
-                      className="hover:cursor-pointer hover:bg-muted"
-                      onClick={() =>
-                        router.push(
-                          `/project/${projectId}/experiments/${log?.eval?.run_id}`
-                        )
-                      }
-                    >
-                      <td
-                        className="px-2 py-1 text-center"
-                        onClick={(e) => e.stopPropagation()}
+      {!fetchExperiments.isLoading && (
+        <div className="flex flex-col gap-12 w-full px-12">
+          {currentData.length === 0 && (
+            <div className="flex flex-col items-center gap-2 mt-24">
+              <p className="text-center text-md">
+                {fetchExperiments.isError
+                  ? "Something went wrong. Please try later."
+                  : "No experiments found. Get started by running your first experiment."}
+              </p>
+              <Button>New Experiment</Button>
+            </div>
+          )}
+          {currentData.length > 0 && (
+            <div className="overflow-y-scroll" ref={scrollableDivRef as any}>
+              <table className="table-auto overflow-x-scroll w-max border-separate border border-muted rounded-md mt-6">
+                <thead className="bg-muted">
+                  <tr>
+                    <th className="w-12 rounded-md p-2">
+                      <Checkbox disabled={true} />
+                    </th>
+                    <th className="p-2 rounded-md text-sm font-medium">
+                      Run ID
+                    </th>
+                    <th className="p-2 rounded-md text-sm font-medium">
+                      Started at
+                    </th>
+                    <th className="p-2 rounded-md text-sm font-medium">
+                      Completed at
+                    </th>
+                    <th className="p-2 rounded-md text-sm font-medium">Task</th>
+                    <th className="p-2 rounded-md text-sm font-medium">
+                      Total Samples
+                    </th>
+                    <th className="p-2 rounded-md text-sm font-medium">
+                      Model
+                    </th>
+                    <th className="p-2 rounded-md text-sm font-medium">
+                      Plan Name
+                    </th>
+                    <th className="p-2 rounded-md text-sm font-medium">
+                      Scorer
+                    </th>
+                    <th className="p-2 rounded-md text-sm font-medium">
+                      Metrics
+                    </th>
+                    <th className="p-2 rounded-md text-sm font-medium">
+                      Status
+                    </th>
+                  </tr>
+                </thead>
+                <tbody>
+                  {currentData.map((experiment: Run) => {
+                    const log: any = JSON.parse(experiment.log as string);
+                    return (
+                      <tr
+                        key={experiment.id}
+                        className="hover:cursor-pointer hover:bg-muted"
+                        onClick={() =>
+                          router.push(
+                            `/project/${projectId}/experiments/${log?.eval?.run_id}`
+                          )
+                        }
                       >
-                        <Checkbox
-                          disabled={log?.status !== "success"}
-                          onCheckedChange={(value) => {
-                            if (value) {
-                              setComparisonRunIds([
-                                ...comparisonRunIds,
-                                log?.eval?.run_id,
-                              ]);
-                            } else {
-                              setComparisonRunIds(
-                                comparisonRunIds.filter(
-                                  (id) => id !== log?.eval?.run_id
+                        <td
+                          className="px-2 py-1 text-center"
+                          onClick={(e) => e.stopPropagation()}
+                        >
+                          <Checkbox
+                            disabled={log?.status !== "success"}
+                            onCheckedChange={(value) => {
+                              if (value) {
+                                setComparisonRunIds([
+                                  ...comparisonRunIds,
+                                  log?.eval?.run_id,
+                                ]);
+                              } else {
+                                setComparisonRunIds(
+                                  comparisonRunIds.filter(
+                                    (id) => id !== log?.eval?.run_id
+                                  )
+                                );
+                              }
+                            }}
+                          />
+                        </td>
+                        <td className="text-sm px-2 py-1">
+                          {log?.eval?.run_id}
+                        </td>
+                        <td className="text-sm px-2 py-1">
+                          {formatDateTime(log?.stats?.started_at, true)}
+                        </td>
+                        <td className="text-sm px-2 py-1">
+                          {formatDateTime(log?.stats?.completed_at, true)}
+                        </td>
+                        <td className="text-sm px-2 py-1">{log?.eval?.task}</td>
+                        <td className="text-sm px-2 py-1">
+                          {log?.samples?.length || 0}
+                        </td>
+                        <td className="text-sm px-2 py-1">
+                          {log?.eval?.model}
+                        </td>
+                        <td className="text-sm px-2 py-1">{log?.plan?.name}</td>
+                        <td className="text-sm px-2 py-1">
+                          {log?.results?.scorer?.name || "N/A"}
+                        </td>
+                        <td className="text-sm px-2 py-1 flex flex-wrap gap-2 w-72">
+                          {log?.results?.metrics
+                            ? Object.keys(log?.results?.metrics).map(
+                                (metric, i) => (
+                                  <Badge
+                                    variant={"outline"}
+                                    className=""
+                                    key={i}
+                                  >
+                                    {metric +
+                                      ": " +
+                                      (log?.results?.metrics as any)[
+                                        metric
+                                      ].value.toFixed(2)}
+                                  </Badge>
                                 )
-                              );
-                            }
-                          }}
-                        />
-                      </td>
-                      <td className="text-sm px-2 py-1">{log?.eval?.run_id}</td>
-                      <td className="text-sm px-2 py-1">
-                        {formatDateTime(log?.stats?.started_at, true)}
-                      </td>
-                      <td className="text-sm px-2 py-1">
-                        {formatDateTime(log?.stats?.completed_at, true)}
-                      </td>
-                      <td className="text-sm px-2 py-1">{log?.eval?.task}</td>
-                      <td className="text-sm px-2 py-1">
-                        {log?.samples?.length || 0}
-                      </td>
-                      <td className="text-sm px-2 py-1">{log?.eval?.model}</td>
-                      <td className="text-sm px-2 py-1">{log?.plan?.name}</td>
-                      <td className="text-sm px-2 py-1">
-                        {log?.results?.scorer?.name || "N/A"}
-                      </td>
-                      <td className="text-sm px-2 py-1 flex flex-wrap gap-2 w-72">
-                        {log?.results?.metrics
-                          ? Object.keys(log?.results?.metrics).map(
-                              (metric, i) => (
-                                <Badge variant={"outline"} className="" key={i}>
-                                  {metric +
-                                    ": " +
-                                    (log?.results?.metrics as any)[
-                                      metric
-                                    ].value.toFixed(2)}
-                                </Badge>
                               )
-                            )
-                          : "N/A"}
-                      </td>
-                      <td className="px-2 py-1">
-                        <Badge
-                          className={cn(
-                            "capitalize",
-                            log?.status === "success"
-                              ? "text-green-600 bg-green-200 hover:bg-green-200"
-                              : "text-destructive bg-red-200 hover:bg-red-200"
-                          )}
-                        >
-                          {log?.status}
-                        </Badge>
-                      </td>
-                    </tr>
-                  );
-                })}
-              </tbody>
-            </table>
-          </div>
-        )}
-      </div>
+                            : "N/A"}
+                        </td>
+                        <td className="px-2 py-1">
+                          <Badge
+                            className={cn(
+                              "capitalize",
+                              log?.status === "success"
+                                ? "text-green-600 bg-green-200 hover:bg-green-200"
+                                : "text-destructive bg-red-200 hover:bg-red-200"
+                            )}
+                          >
+                            {log?.status}
+                          </Badge>
+                        </td>
+                      </tr>
+                    );
+                  })}
+                </tbody>
+              </table>
+            </div>
+          )}
+        </div>
+      )}
+      {fetchExperiments.isLoading && (
+        <div className="flex flex-col gap-12 w-full px-12">
+          <table className="table-auto overflow-x-scroll w-max border-separate border border-muted rounded-md mt-6">
+            <thead className="bg-muted">
+              <tr>
+                <th className="w-12 rounded-md p-2">
+                  <Checkbox disabled={true} />
+                </th>
+                <th className="p-2 rounded-md text-sm font-medium">Run ID</th>
+                <th className="p-2 rounded-md text-sm font-medium">
+                  Started at
+                </th>
+                <th className="p-2 rounded-md text-sm font-medium">
+                  Completed at
+                </th>
+                <th className="p-2 rounded-md text-sm font-medium">Task</th>
+                <th className="p-2 rounded-md text-sm font-medium">
+                  Total Samples
+                </th>
+                <th className="p-2 rounded-md text-sm font-medium">Model</th>
+                <th className="p-2 rounded-md text-sm font-medium">
+                  Plan Name
+                </th>
+                <th className="p-2 rounded-md text-sm font-medium">Scorer</th>
+                <th className="p-2 rounded-md text-sm font-medium">Metrics</th>
+                <th className="p-2 rounded-md text-sm font-medium">Status</th>
+              </tr>
+            </thead>
+            <tbody>
+              {[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11].map((j) => (
+                <tr key={j}>
+                  {[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11].map((i) => (
+                    <td key={i}>
+                      <Skeleton className="h-[20px] w-[120px]" />
+                    </td>
+                  ))}
+                </tr>
+              ))}
+            </tbody>
+          </table>
+        </div>
+      )}
     </div>
   );
 }

From 5bb474185a0ed2dc98050990a917e420ac2c7b5e Mon Sep 17 00:00:00 2001
From: Karthik Kalyanaraman <karthik@scale3labs.com>
Date: Wed, 12 Jun 2024 22:34:58 -0700
Subject: [PATCH 11/16] Rename experiments and evaluations

---
 .../{evaluate => annotations}/page-client.tsx | 12 +++----
 .../{evaluate => annotations}/page.tsx        |  0
 .../datasets/dataset/[dataset_id]/page.tsx    |  8 ++++-
 .../[run_id]/page.tsx                         | 35 +++++++++++++------
 .../compare/page.tsx                          | 25 +++++++------
 .../page-client.tsx                           | 30 ++++++++++------
 .../{experiments => evaluations}/page.tsx     |  0
 .../{evaluate => annotations}/create-test.tsx |  0
 .../{evaluate => annotations}/edit-test.tsx   |  0
 .../eval-scale-picker.tsx                     |  0
 .../evaluation-row.tsx                        |  0
 .../evaluation-table.tsx                      |  0
 .../{evaluate => annotations}/range-scale.tsx |  0
 .../report-utility.tsx                        |  0
 components/shared/nav.tsx                     |  8 ++---
 15 files changed, 75 insertions(+), 43 deletions(-)
 rename app/(protected)/project/[project_id]/{evaluate => annotations}/page-client.tsx (94%)
 rename app/(protected)/project/[project_id]/{evaluate => annotations}/page.tsx (100%)
 rename app/(protected)/project/[project_id]/{experiments => evaluations}/[run_id]/page.tsx (92%)
 rename app/(protected)/project/[project_id]/{experiments => evaluations}/compare/page.tsx (90%)
 rename app/(protected)/project/[project_id]/{experiments => evaluations}/page-client.tsx (92%)
 rename app/(protected)/project/[project_id]/{experiments => evaluations}/page.tsx (100%)
 rename components/{evaluate => annotations}/create-test.tsx (100%)
 rename components/{evaluate => annotations}/edit-test.tsx (100%)
 rename components/{evaluate => annotations}/eval-scale-picker.tsx (100%)
 rename components/{evaluate => annotations}/evaluation-row.tsx (100%)
 rename components/{evaluate => annotations}/evaluation-table.tsx (100%)
 rename components/{evaluate => annotations}/range-scale.tsx (100%)
 rename components/{experiments => evaluations}/report-utility.tsx (100%)

diff --git a/app/(protected)/project/[project_id]/evaluate/page-client.tsx b/app/(protected)/project/[project_id]/annotations/page-client.tsx
similarity index 94%
rename from app/(protected)/project/[project_id]/evaluate/page-client.tsx
rename to app/(protected)/project/[project_id]/annotations/page-client.tsx
index add3f5b6..8632f83a 100644
--- a/app/(protected)/project/[project_id]/evaluate/page-client.tsx
+++ b/app/(protected)/project/[project_id]/annotations/page-client.tsx
@@ -1,12 +1,12 @@
 "use client";
 
-import { EvalChart } from "@/components/charts/eval-chart";
-import LargeChartSkeleton from "@/components/charts/large-chart-skeleton";
-import { CreateTest } from "@/components/evaluate/create-test";
-import { EditTest } from "@/components/evaluate/edit-test";
+import { CreateTest } from "@/components/annotations/create-test";
+import { EditTest } from "@/components/annotations/edit-test";
 import EvaluationTable, {
   EvaluationTableSkeleton,
-} from "@/components/evaluate/evaluation-table";
+} from "@/components/annotations/evaluation-table";
+import { EvalChart } from "@/components/charts/eval-chart";
+import LargeChartSkeleton from "@/components/charts/large-chart-skeleton";
 import { AddtoDataset } from "@/components/shared/add-to-dataset";
 import { Button } from "@/components/ui/button";
 import { Test } from "@prisma/client";
@@ -73,7 +73,7 @@ export default function PageClient({ email }: { email: string }) {
   return (
     <div className="w-full flex flex-col gap-4">
       <div className="md:px-24 px-12 py-12 flex justify-between bg-muted">
-        <h1 className="text-3xl font-semibold">Evaluations</h1>
+        <h1 className="text-3xl font-semibold">Annotate & Measure</h1>
         <div className="flex gap-2">
           <CreateTest projectId={projectId} variant={"outline"} />
           {tests?.tests?.length > 0 && (
diff --git a/app/(protected)/project/[project_id]/evaluate/page.tsx b/app/(protected)/project/[project_id]/annotations/page.tsx
similarity index 100%
rename from app/(protected)/project/[project_id]/evaluate/page.tsx
rename to app/(protected)/project/[project_id]/annotations/page.tsx
diff --git a/app/(protected)/project/[project_id]/datasets/dataset/[dataset_id]/page.tsx b/app/(protected)/project/[project_id]/datasets/dataset/[dataset_id]/page.tsx
index 0803efba..06587a2f 100644
--- a/app/(protected)/project/[project_id]/datasets/dataset/[dataset_id]/page.tsx
+++ b/app/(protected)/project/[project_id]/datasets/dataset/[dataset_id]/page.tsx
@@ -9,7 +9,8 @@ import { Button } from "@/components/ui/button";
 import { Separator } from "@/components/ui/separator";
 import { PAGE_SIZE } from "@/lib/constants";
 import { Data } from "@prisma/client";
-import { ChevronLeft } from "lucide-react";
+import { ArrowTopRightIcon } from "@radix-ui/react-icons";
+import { ChevronLeft, FlaskConical } from "lucide-react";
 import { useParams } from "next/navigation";
 import { useState } from "react";
 import { useBottomScrollListener } from "react-bottom-scroll-listener";
@@ -98,6 +99,11 @@ export default function Dataset() {
             datasetId={dataset_id}
             disabled={fetchDataset.isLoading || currentData?.length === 0}
           />
+          <Button variant="outline">
+            Run Evaluation
+            <FlaskConical className="ml-1 h-4 w-4" />
+            <ArrowTopRightIcon className="ml-1 h-4 w-4" />
+          </Button>
         </div>
         <div className="flex flex-col gap-3 rounded-md border border-muted max-h-screen overflow-y-scroll">
           <div className="grid grid-cols-5 items-center justify-stretch gap-3 py-3 px-4 bg-muted">
diff --git a/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx b/app/(protected)/project/[project_id]/evaluations/[run_id]/page.tsx
similarity index 92%
rename from app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx
rename to app/(protected)/project/[project_id]/evaluations/[run_id]/page.tsx
index 326a72c8..11e5d9e6 100644
--- a/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx
+++ b/app/(protected)/project/[project_id]/evaluations/[run_id]/page.tsx
@@ -1,6 +1,6 @@
 "use client";
 
-import { UtilityButton } from "@/components/experiments/report-utility";
+import { UtilityButton } from "@/components/evaluations/report-utility";
 import { Conversation } from "@/components/shared/conversation-view";
 import { Badge } from "@/components/ui/badge";
 import { Button } from "@/components/ui/button";
@@ -14,13 +14,20 @@ import {
 } from "@/components/ui/sheet";
 import { Skeleton } from "@/components/ui/skeleton";
 import { cn } from "@/lib/utils";
-import { ChevronLeft, ChevronRight, MoveDiagonal, X } from "lucide-react";
+import { ArrowTopRightIcon } from "@radix-ui/react-icons";
+import {
+  ChevronLeft,
+  ChevronRight,
+  FlaskConical,
+  MoveDiagonal,
+  X,
+} from "lucide-react";
 import { useParams, useRouter } from "next/navigation";
 import { useState } from "react";
 import { useQuery } from "react-query";
 import { toast } from "sonner";
 
-export default function Experiments() {
+export default function Evaluation() {
   const router = useRouter();
   const runId = useParams()?.run_id as string;
   const projectId = useParams()?.project_id as string;
@@ -36,11 +43,11 @@ export default function Experiments() {
       );
       if (!response.ok) {
         const error = await response.json();
-        throw new Error(error?.message || "Failed to fetch the experiment");
+        throw new Error(error?.message || "Failed to fetch the evaluation");
       }
       const result = await response.json();
       if (!result.run || !result.run.log) {
-        throw new Error("No experiment found");
+        throw new Error("No evaluations found");
       }
       const exp = JSON.parse(result.run.log);
       setExperiment(exp);
@@ -52,7 +59,7 @@ export default function Experiments() {
       return result;
     },
     onError: (error) => {
-      toast.error("Failed to fetch the experiment", {
+      toast.error("Failed to fetch the evaluation", {
         description: error instanceof Error ? error.message : String(error),
       });
     },
@@ -84,7 +91,9 @@ export default function Experiments() {
               : "default"
           }
         >
-          New Experiment
+          New Evaluation
+          <FlaskConical className="ml-1 h-4 w-4" />
+          <ArrowTopRightIcon className="ml-1 h-4 w-4" />
         </Button>
       </div>
       <div className="flex flex-col gap-6 w-full px-12">
@@ -121,7 +130,7 @@ export default function Experiments() {
         {experiment?.error && (
           <div className="flex flex-col gap-4">
             <p className="text-xl text-center font-semibold">
-              An error occurred while running this experiment. See below for
+              An error occurred while running this evaluation. See below for
               more details
             </p>
             <div className="flex flex-col gap-2 p-2 border border-muted-foreground bg-muted rounded-md">
@@ -137,7 +146,7 @@ export default function Experiments() {
         {experimentError && (
           <div className="flex flex-col items-center gap-2 mt-6">
             <p className="text-center text-md">
-              Failed to fetch the experiment. Please try again later.
+              Failed to fetch the evaluation. Please try again later.
             </p>
           </div>
         )}
@@ -145,9 +154,13 @@ export default function Experiments() {
           experiment?.samples?.length === 0) && (
           <div className="flex flex-col items-center gap-2 mt-6">
             <p className="text-center text-md">
-              No samples found for this experiment.
+              No samples found for this evaluation.
             </p>
-            <Button className="w-fit">New Experiment</Button>
+            <Button className="w-fit">
+              New Evaluation
+              <FlaskConical className="ml-1 h-4 w-4" />
+              <ArrowTopRightIcon className="ml-1 h-4 w-4" />
+            </Button>
           </div>
         )}
         {!experimentLoading &&
diff --git a/app/(protected)/project/[project_id]/experiments/compare/page.tsx b/app/(protected)/project/[project_id]/evaluations/compare/page.tsx
similarity index 90%
rename from app/(protected)/project/[project_id]/experiments/compare/page.tsx
rename to app/(protected)/project/[project_id]/evaluations/compare/page.tsx
index d03db789..f61d4fc2 100644
--- a/app/(protected)/project/[project_id]/experiments/compare/page.tsx
+++ b/app/(protected)/project/[project_id]/evaluations/compare/page.tsx
@@ -4,13 +4,14 @@ import { Badge } from "@/components/ui/badge";
 import { Button } from "@/components/ui/button";
 import { Skeleton } from "@/components/ui/skeleton";
 import { cn } from "@/lib/utils";
-import { ChevronLeft } from "lucide-react";
+import { ArrowTopRightIcon } from "@radix-ui/react-icons";
+import { ChevronLeft, FlaskConical } from "lucide-react";
 import { useParams, useRouter, useSearchParams } from "next/navigation";
 import { useState } from "react";
 import { useQuery } from "react-query";
 import { toast } from "sonner";
 
-export default function Experiments() {
+export default function Compare() {
   const router = useRouter();
   // get run id from query params
   const searchParams = useSearchParams();
@@ -31,11 +32,11 @@ export default function Experiments() {
         );
         if (!response.ok) {
           const error = await response.json();
-          throw new Error(error?.message || "Failed to fetch the experiment");
+          throw new Error(error?.message || "Failed to fetch the evaluation");
         }
         const result = await response.json();
         if (!result.run || !result.run.log) {
-          throw new Error("No experiment found");
+          throw new Error("No evaluation found");
         }
         return JSON.parse(result.run.log);
       });
@@ -45,7 +46,7 @@ export default function Experiments() {
       return exps;
     },
     onError: (error) => {
-      toast.error("Failed to fetch one or more experiments", {
+      toast.error("Failed to fetch one or more evaluations", {
         description: error instanceof Error ? error.message : String(error),
       });
     },
@@ -70,15 +71,19 @@ export default function Experiments() {
             (experiments?.length === 0 && (
               <div className="flex flex-col items-center gap-2 mt-6">
                 <p className="text-center text-md">
-                  No experiments found for comparison.
+                  No evaluations found for comparison.
                 </p>
-                <Button className="w-fit">New Experiment</Button>
+                <Button className="w-fit">
+                  New Evaluation
+                  <FlaskConical className="ml-1 h-4 w-4" />
+                  <ArrowTopRightIcon className="ml-1 h-4 w-4" />
+                </Button>
               </div>
             )))}
         {experimentsError && !experimentsLoading && (
           <div className="flex flex-col items-center gap-2 mt-6">
             <p className="text-center text-md">
-              Something went wrong while fetching the experiments. Please try
+              Something went wrong while fetching the evaluations. Please try
               again.
             </p>
           </div>
@@ -86,8 +91,8 @@ export default function Experiments() {
         {!experimentsLoading && !isComparable && (
           <div className="flex flex-col items-center gap-2 mt-24">
             <p className="text-center text-md">
-              The selected experiments are not comparable. Please select
-              experiments ran against the same dataset.
+              The selected evaluations are not comparable. Please select
+              evaluations ran against the same dataset.
             </p>
             <Button onClick={() => router.back()}>
               <ChevronLeft className="text-muted-foreground" size={20} />
diff --git a/app/(protected)/project/[project_id]/experiments/page-client.tsx b/app/(protected)/project/[project_id]/evaluations/page-client.tsx
similarity index 92%
rename from app/(protected)/project/[project_id]/experiments/page-client.tsx
rename to app/(protected)/project/[project_id]/evaluations/page-client.tsx
index 45eb665b..3f22f2bd 100644
--- a/app/(protected)/project/[project_id]/experiments/page-client.tsx
+++ b/app/(protected)/project/[project_id]/evaluations/page-client.tsx
@@ -6,13 +6,15 @@ import { Checkbox } from "@/components/ui/checkbox";
 import { Skeleton } from "@/components/ui/skeleton";
 import { cn, formatDateTime } from "@/lib/utils";
 import { Run } from "@prisma/client";
+import { ArrowTopRightIcon } from "@radix-ui/react-icons";
+import { FlaskConical } from "lucide-react";
 import { useParams, useRouter } from "next/navigation";
 import { useState } from "react";
 import { useBottomScrollListener } from "react-bottom-scroll-listener";
 import { useQuery } from "react-query";
 import { toast } from "sonner";
 
-export default function Experiments() {
+export default function Evaluations() {
   const router = useRouter();
   const projectId = useParams()?.project_id as string;
   const [comparisonRunIds, setComparisonRunIds] = useState<string[]>([]);
@@ -39,7 +41,7 @@ export default function Experiments() {
       );
       if (!response.ok) {
         const error = await response.json();
-        throw new Error(error?.message || "Failed to fetch experiments");
+        throw new Error(error?.message || "Failed to fetch evaluations");
       }
       const result = await response.json();
       return result;
@@ -67,7 +69,7 @@ export default function Experiments() {
       setShowLoader(false);
     },
     onError: (error) => {
-      toast.error("Failed to fetch experiments", {
+      toast.error("Failed to fetch evaluations", {
         description: error instanceof Error ? error.message : String(error),
       });
     },
@@ -76,11 +78,8 @@ export default function Experiments() {
   return (
     <div className="w-full flex flex-col gap-4">
       <div className="md:px-24 px-12 py-12 flex justify-between bg-muted">
-        <h1 className="text-3xl font-semibold">Experiments</h1>
+        <h1 className="text-3xl font-semibold">Evaluations</h1>
         <div className="flex gap-2">
-          <Button variant={currentData.length > 0 ? "outline" : "default"}>
-            New Experiment
-          </Button>
           <Button
             variant={currentData.length > 0 ? "default" : "outline"}
             disabled={comparisonRunIds.length < 2}
@@ -89,11 +88,16 @@ export default function Experiments() {
               const query = comparisonRunIds
                 .map((runId, i) => (i === 0 ? "" : "&") + "run_id=" + runId)
                 .join("");
-              router.push(`/project/${projectId}/experiments/compare?${query}`);
+              router.push(`/project/${projectId}/evaluations/compare?${query}`);
             }}
           >
             Compare
           </Button>
+          <Button variant={currentData.length > 0 ? "outline" : "default"}>
+            New Evaluation
+            <FlaskConical className="ml-1 h-4 w-4" />
+            <ArrowTopRightIcon className="ml-1 h-4 w-4" />
+          </Button>
         </div>
       </div>
       {!fetchExperiments.isLoading && (
@@ -103,9 +107,13 @@ export default function Experiments() {
               <p className="text-center text-md">
                 {fetchExperiments.isError
                   ? "Something went wrong. Please try later."
-                  : "No experiments found. Get started by running your first experiment."}
+                  : "No evaluations found. Get started by running your first evaluation."}
               </p>
-              <Button>New Experiment</Button>
+              <Button>
+                New Evaluation
+                <FlaskConical className="ml-1 h-4 w-4" />
+                <ArrowTopRightIcon className="ml-1 h-4 w-4" />
+              </Button>
             </div>
           )}
           {currentData.length > 0 && (
@@ -155,7 +163,7 @@ export default function Experiments() {
                         className="hover:cursor-pointer hover:bg-muted"
                         onClick={() =>
                           router.push(
-                            `/project/${projectId}/experiments/${log?.eval?.run_id}`
+                            `/project/${projectId}/evaluations/${log?.eval?.run_id}`
                           )
                         }
                       >
diff --git a/app/(protected)/project/[project_id]/experiments/page.tsx b/app/(protected)/project/[project_id]/evaluations/page.tsx
similarity index 100%
rename from app/(protected)/project/[project_id]/experiments/page.tsx
rename to app/(protected)/project/[project_id]/evaluations/page.tsx
diff --git a/components/evaluate/create-test.tsx b/components/annotations/create-test.tsx
similarity index 100%
rename from components/evaluate/create-test.tsx
rename to components/annotations/create-test.tsx
diff --git a/components/evaluate/edit-test.tsx b/components/annotations/edit-test.tsx
similarity index 100%
rename from components/evaluate/edit-test.tsx
rename to components/annotations/edit-test.tsx
diff --git a/components/evaluate/eval-scale-picker.tsx b/components/annotations/eval-scale-picker.tsx
similarity index 100%
rename from components/evaluate/eval-scale-picker.tsx
rename to components/annotations/eval-scale-picker.tsx
diff --git a/components/evaluate/evaluation-row.tsx b/components/annotations/evaluation-row.tsx
similarity index 100%
rename from components/evaluate/evaluation-row.tsx
rename to components/annotations/evaluation-row.tsx
diff --git a/components/evaluate/evaluation-table.tsx b/components/annotations/evaluation-table.tsx
similarity index 100%
rename from components/evaluate/evaluation-table.tsx
rename to components/annotations/evaluation-table.tsx
diff --git a/components/evaluate/range-scale.tsx b/components/annotations/range-scale.tsx
similarity index 100%
rename from components/evaluate/range-scale.tsx
rename to components/annotations/range-scale.tsx
diff --git a/components/experiments/report-utility.tsx b/components/evaluations/report-utility.tsx
similarity index 100%
rename from components/experiments/report-utility.tsx
rename to components/evaluations/report-utility.tsx
diff --git a/components/shared/nav.tsx b/components/shared/nav.tsx
index 931d8a16..1f797012 100644
--- a/components/shared/nav.tsx
+++ b/components/shared/nav.tsx
@@ -24,16 +24,16 @@ const ProjectNavLinks = (id: string) => [
     href: `/project/${id}/traces`,
   },
   {
-    name: "Evaluate",
-    href: `/project/${id}/evaluate`,
+    name: "Annotations",
+    href: `/project/${id}/annotations`,
   },
   {
     name: "Datasets",
     href: `/project/${id}/datasets`,
   },
   {
-    name: "Experiments",
-    href: `/project/${id}/experiments`,
+    name: "Evaluations",
+    href: `/project/${id}/evaluations`,
   },
   {
     name: "Playground",

From b1655880106d033c639bd42f4dfa5ec83f8ba5b0 Mon Sep 17 00:00:00 2001
From: Karthik Kalyanaraman <karthik@scale3labs.com>
Date: Wed, 12 Jun 2024 22:36:28 -0700
Subject: [PATCH 12/16] Add loading spinner

---
 .../project/[project_id]/evaluations/page-client.tsx        | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/app/(protected)/project/[project_id]/evaluations/page-client.tsx b/app/(protected)/project/[project_id]/evaluations/page-client.tsx
index 3f22f2bd..1566093a 100644
--- a/app/(protected)/project/[project_id]/evaluations/page-client.tsx
+++ b/app/(protected)/project/[project_id]/evaluations/page-client.tsx
@@ -1,5 +1,6 @@
 "use client";
 
+import { Spinner } from "@/components/shared/spinner";
 import { Badge } from "@/components/ui/badge";
 import { Button } from "@/components/ui/button";
 import { Checkbox } from "@/components/ui/checkbox";
@@ -245,6 +246,11 @@ export default function Evaluations() {
                   })}
                 </tbody>
               </table>
+              {showLoader && (
+                <div className="flex justify-center py-8">
+                  <Spinner className="h-8 w-8 text-center" />
+                </div>
+              )}
             </div>
           )}
         </div>

From 2d30fe96c19338d0cd9253a0728e5a435927fd0c Mon Sep 17 00:00:00 2001
From: Karthik Kalyanaraman <karthik@scale3labs.com>
Date: Wed, 12 Jun 2024 23:43:11 -0700
Subject: [PATCH 13/16] Link evaluation

---
 .../datasets/dataset/[dataset_id]/page.tsx    | 45 ++++++++++++-------
 .../evaluations/[run_id]/page.tsx             | 38 +++++++++-------
 .../[project_id]/evaluations/compare/page.tsx | 14 +++---
 .../[project_id]/evaluations/page-client.tsx  | 26 ++++++-----
 lib/constants.ts                              |  2 +
 5 files changed, 77 insertions(+), 48 deletions(-)

diff --git a/app/(protected)/project/[project_id]/datasets/dataset/[dataset_id]/page.tsx b/app/(protected)/project/[project_id]/datasets/dataset/[dataset_id]/page.tsx
index 06587a2f..34acb744 100644
--- a/app/(protected)/project/[project_id]/datasets/dataset/[dataset_id]/page.tsx
+++ b/app/(protected)/project/[project_id]/datasets/dataset/[dataset_id]/page.tsx
@@ -5,12 +5,14 @@ import DatasetRowSkeleton from "@/components/project/dataset/dataset-row-skeleto
 import { EditData } from "@/components/project/dataset/edit-data";
 import { DownloadDataset } from "@/components/shared/download-dataset";
 import { Spinner } from "@/components/shared/spinner";
+import { Badge } from "@/components/ui/badge";
 import { Button } from "@/components/ui/button";
 import { Separator } from "@/components/ui/separator";
-import { PAGE_SIZE } from "@/lib/constants";
+import { EVALUATIONS_DOCS_URL, PAGE_SIZE } from "@/lib/constants";
 import { Data } from "@prisma/client";
 import { ArrowTopRightIcon } from "@radix-ui/react-icons";
 import { ChevronLeft, FlaskConical } from "lucide-react";
+import Link from "next/link";
 import { useParams } from "next/navigation";
 import { useState } from "react";
 import { useBottomScrollListener } from "react-bottom-scroll-listener";
@@ -88,22 +90,31 @@ export default function Dataset() {
   } else {
     return (
       <div className="w-full py-6 px-6 flex flex-col gap-4">
-        <div className="flex gap-4 items-center w-fit">
-          <Button variant="secondary" onClick={() => window.history.back()}>
-            <ChevronLeft className="mr-1" />
-            Back
-          </Button>
-          <CreateData datasetId={dataset_id} />
-          <DownloadDataset
-            projectId={projectId}
-            datasetId={dataset_id}
-            disabled={fetchDataset.isLoading || currentData?.length === 0}
-          />
-          <Button variant="outline">
-            Run Evaluation
-            <FlaskConical className="ml-1 h-4 w-4" />
-            <ArrowTopRightIcon className="ml-1 h-4 w-4" />
-          </Button>
+        <div className="flex justify-between items-center">
+          <div className="flex gap-4 items-center w-fit">
+            <Button variant="secondary" onClick={() => window.history.back()}>
+              <ChevronLeft className="mr-1" />
+              Back
+            </Button>
+            <CreateData datasetId={dataset_id} />
+            <DownloadDataset
+              projectId={projectId}
+              datasetId={dataset_id}
+              disabled={fetchDataset.isLoading || currentData?.length === 0}
+            />
+          </div>
+          <div className="flex gap-4 items-center w-fit">
+            <Badge variant={"outline"} className="text-sm">
+              Dataset ID: {dataset_id}
+            </Badge>
+            <Link href={EVALUATIONS_DOCS_URL}>
+              <Button variant="outline">
+                Run Evaluation
+                <FlaskConical className="ml-1 h-4 w-4" />
+                <ArrowTopRightIcon className="ml-1 h-4 w-4" />
+              </Button>
+            </Link>
+          </div>
         </div>
         <div className="flex flex-col gap-3 rounded-md border border-muted max-h-screen overflow-y-scroll">
           <div className="grid grid-cols-5 items-center justify-stretch gap-3 py-3 px-4 bg-muted">
diff --git a/app/(protected)/project/[project_id]/evaluations/[run_id]/page.tsx b/app/(protected)/project/[project_id]/evaluations/[run_id]/page.tsx
index 11e5d9e6..fecb5a2a 100644
--- a/app/(protected)/project/[project_id]/evaluations/[run_id]/page.tsx
+++ b/app/(protected)/project/[project_id]/evaluations/[run_id]/page.tsx
@@ -13,6 +13,7 @@ import {
   SheetTitle,
 } from "@/components/ui/sheet";
 import { Skeleton } from "@/components/ui/skeleton";
+import { EVALUATIONS_DOCS_URL } from "@/lib/constants";
 import { cn } from "@/lib/utils";
 import { ArrowTopRightIcon } from "@radix-ui/react-icons";
 import {
@@ -22,6 +23,7 @@ import {
   MoveDiagonal,
   X,
 } from "lucide-react";
+import Link from "next/link";
 import { useParams, useRouter } from "next/navigation";
 import { useState } from "react";
 import { useQuery } from "react-query";
@@ -84,17 +86,19 @@ export default function Evaluation() {
             </Badge>
           )}
         </div>
-        <Button
-          variant={
-            experiment && experiment?.samples?.length > 0
-              ? "outline"
-              : "default"
-          }
-        >
-          New Evaluation
-          <FlaskConical className="ml-1 h-4 w-4" />
-          <ArrowTopRightIcon className="ml-1 h-4 w-4" />
-        </Button>
+        <Link href={EVALUATIONS_DOCS_URL}>
+          <Button
+            variant={
+              experiment && experiment?.samples?.length > 0
+                ? "outline"
+                : "default"
+            }
+          >
+            New Evaluation
+            <FlaskConical className="ml-1 h-4 w-4" />
+            <ArrowTopRightIcon className="ml-1 h-4 w-4" />
+          </Button>
+        </Link>
       </div>
       <div className="flex flex-col gap-6 w-full px-12">
         <div className="flex gap-2">
@@ -156,11 +160,13 @@ export default function Evaluation() {
             <p className="text-center text-md">
               No samples found for this evaluation.
             </p>
-            <Button className="w-fit">
-              New Evaluation
-              <FlaskConical className="ml-1 h-4 w-4" />
-              <ArrowTopRightIcon className="ml-1 h-4 w-4" />
-            </Button>
+            <Link href={EVALUATIONS_DOCS_URL}>
+              <Button className="w-fit">
+                New Evaluation
+                <FlaskConical className="ml-1 h-4 w-4" />
+                <ArrowTopRightIcon className="ml-1 h-4 w-4" />
+              </Button>
+            </Link>
           </div>
         )}
         {!experimentLoading &&
diff --git a/app/(protected)/project/[project_id]/evaluations/compare/page.tsx b/app/(protected)/project/[project_id]/evaluations/compare/page.tsx
index f61d4fc2..420f3fdd 100644
--- a/app/(protected)/project/[project_id]/evaluations/compare/page.tsx
+++ b/app/(protected)/project/[project_id]/evaluations/compare/page.tsx
@@ -3,9 +3,11 @@
 import { Badge } from "@/components/ui/badge";
 import { Button } from "@/components/ui/button";
 import { Skeleton } from "@/components/ui/skeleton";
+import { EVALUATIONS_DOCS_URL } from "@/lib/constants";
 import { cn } from "@/lib/utils";
 import { ArrowTopRightIcon } from "@radix-ui/react-icons";
 import { ChevronLeft, FlaskConical } from "lucide-react";
+import Link from "next/link";
 import { useParams, useRouter, useSearchParams } from "next/navigation";
 import { useState } from "react";
 import { useQuery } from "react-query";
@@ -73,11 +75,13 @@ export default function Compare() {
                 <p className="text-center text-md">
                   No evaluations found for comparison.
                 </p>
-                <Button className="w-fit">
-                  New Evaluation
-                  <FlaskConical className="ml-1 h-4 w-4" />
-                  <ArrowTopRightIcon className="ml-1 h-4 w-4" />
-                </Button>
+                <Link href={EVALUATIONS_DOCS_URL}>
+                  <Button className="w-fit">
+                    New Evaluation
+                    <FlaskConical className="ml-1 h-4 w-4" />
+                    <ArrowTopRightIcon className="ml-1 h-4 w-4" />
+                  </Button>
+                </Link>
               </div>
             )))}
         {experimentsError && !experimentsLoading && (
diff --git a/app/(protected)/project/[project_id]/evaluations/page-client.tsx b/app/(protected)/project/[project_id]/evaluations/page-client.tsx
index 1566093a..2156110a 100644
--- a/app/(protected)/project/[project_id]/evaluations/page-client.tsx
+++ b/app/(protected)/project/[project_id]/evaluations/page-client.tsx
@@ -5,10 +5,12 @@ import { Badge } from "@/components/ui/badge";
 import { Button } from "@/components/ui/button";
 import { Checkbox } from "@/components/ui/checkbox";
 import { Skeleton } from "@/components/ui/skeleton";
+import { EVALUATIONS_DOCS_URL } from "@/lib/constants";
 import { cn, formatDateTime } from "@/lib/utils";
 import { Run } from "@prisma/client";
 import { ArrowTopRightIcon } from "@radix-ui/react-icons";
 import { FlaskConical } from "lucide-react";
+import Link from "next/link";
 import { useParams, useRouter } from "next/navigation";
 import { useState } from "react";
 import { useBottomScrollListener } from "react-bottom-scroll-listener";
@@ -94,11 +96,13 @@ export default function Evaluations() {
           >
             Compare
           </Button>
-          <Button variant={currentData.length > 0 ? "outline" : "default"}>
-            New Evaluation
-            <FlaskConical className="ml-1 h-4 w-4" />
-            <ArrowTopRightIcon className="ml-1 h-4 w-4" />
-          </Button>
+          <Link href={EVALUATIONS_DOCS_URL}>
+            <Button variant={currentData.length > 0 ? "outline" : "default"}>
+              New Evaluation
+              <FlaskConical className="ml-1 h-4 w-4" />
+              <ArrowTopRightIcon className="ml-1 h-4 w-4" />
+            </Button>
+          </Link>
         </div>
       </div>
       {!fetchExperiments.isLoading && (
@@ -110,11 +114,13 @@ export default function Evaluations() {
                   ? "Something went wrong. Please try later."
                   : "No evaluations found. Get started by running your first evaluation."}
               </p>
-              <Button>
-                New Evaluation
-                <FlaskConical className="ml-1 h-4 w-4" />
-                <ArrowTopRightIcon className="ml-1 h-4 w-4" />
-              </Button>
+              <Link href={EVALUATIONS_DOCS_URL}>
+                <Button>
+                  New Evaluation
+                  <FlaskConical className="ml-1 h-4 w-4" />
+                  <ArrowTopRightIcon className="ml-1 h-4 w-4" />
+                </Button>
+              </Link>
             </div>
           )}
           {currentData.length > 0 && (
diff --git a/lib/constants.ts b/lib/constants.ts
index e5d9a15b..766e65ad 100644
--- a/lib/constants.ts
+++ b/lib/constants.ts
@@ -5,6 +5,8 @@ import {
 } from "@langtrase/trace-attributes";
 import { Test } from "@prisma/client";
 import { TiktokenEncoding } from "js-tiktoken";
+export const EVALUATIONS_DOCS_URL =
+  "https://docs.langtrace.ai/features/evaluations";
 
 export const SCHEDULE_CALL_LINK =
   "https://calendar.app.google/Go5gXNPcqZjAY4i47";

From 405dc9207d4a97dafdf82a460c646097daaab0df Mon Sep 17 00:00:00 2001
From: Karthik Kalyanaraman <karthik@scale3labs.com>
Date: Wed, 12 Jun 2024 23:44:10 -0700
Subject: [PATCH 14/16] Link to evals

---
 .../project/[project_id]/evaluations/[run_id]/page.tsx        | 4 ++--
 .../project/[project_id]/evaluations/compare/page.tsx         | 2 +-
 .../project/[project_id]/evaluations/page-client.tsx          | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/app/(protected)/project/[project_id]/evaluations/[run_id]/page.tsx b/app/(protected)/project/[project_id]/evaluations/[run_id]/page.tsx
index fecb5a2a..758f9dcd 100644
--- a/app/(protected)/project/[project_id]/evaluations/[run_id]/page.tsx
+++ b/app/(protected)/project/[project_id]/evaluations/[run_id]/page.tsx
@@ -86,7 +86,7 @@ export default function Evaluation() {
             </Badge>
           )}
         </div>
-        <Link href={EVALUATIONS_DOCS_URL}>
+        <Link href={EVALUATIONS_DOCS_URL} target="_blank">
           <Button
             variant={
               experiment && experiment?.samples?.length > 0
@@ -160,7 +160,7 @@ export default function Evaluation() {
             <p className="text-center text-md">
               No samples found for this evaluation.
             </p>
-            <Link href={EVALUATIONS_DOCS_URL}>
+            <Link href={EVALUATIONS_DOCS_URL} target="_blank">
               <Button className="w-fit">
                 New Evaluation
                 <FlaskConical className="ml-1 h-4 w-4" />
diff --git a/app/(protected)/project/[project_id]/evaluations/compare/page.tsx b/app/(protected)/project/[project_id]/evaluations/compare/page.tsx
index 420f3fdd..347cdcc9 100644
--- a/app/(protected)/project/[project_id]/evaluations/compare/page.tsx
+++ b/app/(protected)/project/[project_id]/evaluations/compare/page.tsx
@@ -75,7 +75,7 @@ export default function Compare() {
                 <p className="text-center text-md">
                   No evaluations found for comparison.
                 </p>
-                <Link href={EVALUATIONS_DOCS_URL}>
+                <Link href={EVALUATIONS_DOCS_URL} target="_blank">
                   <Button className="w-fit">
                     New Evaluation
                     <FlaskConical className="ml-1 h-4 w-4" />
diff --git a/app/(protected)/project/[project_id]/evaluations/page-client.tsx b/app/(protected)/project/[project_id]/evaluations/page-client.tsx
index 2156110a..9dbb1643 100644
--- a/app/(protected)/project/[project_id]/evaluations/page-client.tsx
+++ b/app/(protected)/project/[project_id]/evaluations/page-client.tsx
@@ -96,7 +96,7 @@ export default function Evaluations() {
           >
             Compare
           </Button>
-          <Link href={EVALUATIONS_DOCS_URL}>
+          <Link href={EVALUATIONS_DOCS_URL} target="_blank">
             <Button variant={currentData.length > 0 ? "outline" : "default"}>
               New Evaluation
               <FlaskConical className="ml-1 h-4 w-4" />
@@ -114,7 +114,7 @@ export default function Evaluations() {
                   ? "Something went wrong. Please try later."
                   : "No evaluations found. Get started by running your first evaluation."}
               </p>
-              <Link href={EVALUATIONS_DOCS_URL}>
+              <Link href={EVALUATIONS_DOCS_URL} target="_blank">
                 <Button>
                   New Evaluation
                   <FlaskConical className="ml-1 h-4 w-4" />

From dca161111590601932eadb87cc0c0cfb5613bf00 Mon Sep 17 00:00:00 2001
From: Karthik Kalyanaraman <karthik@scale3labs.com>
Date: Wed, 12 Jun 2024 23:46:23 -0700
Subject: [PATCH 15/16] Fixes

---
 app/api/data/route.ts             | 6 +++---
 app/api/dataset/download/route.ts | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/app/api/data/route.ts b/app/api/data/route.ts
index fb186916..e0bfd9ff 100644
--- a/app/api/data/route.ts
+++ b/app/api/data/route.ts
@@ -91,7 +91,7 @@ export async function POST(req: NextRequest) {
         input: data.input,
         output: data.output,
         contexts: data.contexts || [],
-        expectedOutput: data.expectedOutput || "",
+        annotatedOutput: data.annotatedOutput || "",
         note: data.note || "",
         spanId: data.spanId || "",
         projectId: projectId || "",
@@ -130,7 +130,7 @@ export async function PUT(req: NextRequest) {
     }
 
     const data = await req.json();
-    const { id, input, output, expectedOutput, contexts, note } = data;
+    const { id, input, output, annotatedOutput, contexts, note } = data;
 
     const result = await prisma.data.update({
       where: {
@@ -139,7 +139,7 @@ export async function PUT(req: NextRequest) {
       data: {
         input,
         output,
-        expectedOutput,
+        annotatedOutput,
         contexts,
         note,
       },
diff --git a/app/api/dataset/download/route.ts b/app/api/dataset/download/route.ts
index cb29e18f..b26f0968 100644
--- a/app/api/dataset/download/route.ts
+++ b/app/api/dataset/download/route.ts
@@ -87,7 +87,7 @@ export async function GET(req: NextRequest) {
     });
 
     const result: any = [];
-    // convert the data to include only input, output and expectedOutput fields. Rename output to target
+    // convert the data to include only input, output and annotatedOutput fields. Rename output to target
     data.forEach((d) => {
       result.push({
         input: d.input,

From 4a5ea2c2eacc4cd291965100ad446567098d459a Mon Sep 17 00:00:00 2001
From: Karthik Kalyanaraman <karthik@scale3labs.com>
Date: Thu, 13 Jun 2024 12:12:35 -0700
Subject: [PATCH 16/16] Minor UI fixes

---
 .../[project_id]/datasets/dataset/[dataset_id]/page.tsx  | 2 +-
 components/project/traces/traces.tsx                     | 9 +++++----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/app/(protected)/project/[project_id]/datasets/dataset/[dataset_id]/page.tsx b/app/(protected)/project/[project_id]/datasets/dataset/[dataset_id]/page.tsx
index 34acb744..4cd9c82e 100644
--- a/app/(protected)/project/[project_id]/datasets/dataset/[dataset_id]/page.tsx
+++ b/app/(protected)/project/[project_id]/datasets/dataset/[dataset_id]/page.tsx
@@ -107,7 +107,7 @@ export default function Dataset() {
             <Badge variant={"outline"} className="text-sm">
               Dataset ID: {dataset_id}
             </Badge>
-            <Link href={EVALUATIONS_DOCS_URL}>
+            <Link href={EVALUATIONS_DOCS_URL} target="_blank">
               <Button variant="outline">
                 Run Evaluation
                 <FlaskConical className="ml-1 h-4 w-4" />
diff --git a/components/project/traces/traces.tsx b/components/project/traces/traces.tsx
index 4a41af56..b74ab45b 100644
--- a/components/project/traces/traces.tsx
+++ b/components/project/traces/traces.tsx
@@ -175,14 +175,15 @@ export default function Traces({ email }: { email: string }) {
               </label>
             </div>
           ))}
-          <div>
+          <div className="flex items-center gap-1">
             <Button
-              className="h-5 w-3 dark:bg-white bg-muted border border-black hover:bg-secondary"
+              variant={"outline"}
+              size={"icon"}
               onClick={() => setIsFilterDialogOpen(true)}
             >
-              <FilterListIcon className="cursor-pointer h-4 dark:bg-white text-black" />
+              <FilterListIcon className="cursor-pointer" />
             </Button>
-            <label className="text-xs font-semibold px-2">Filters</label>
+            <p className="text-xs font-semibold">Advanced Filters</p>
           </div>
         </div>
         <div className="flex gap-2 items-center">