From b1781058a587ea8f74a047308cbe67b6cc5117de Mon Sep 17 00:00:00 2001 From: Karthik Kalyanaraman Date: Thu, 4 Apr 2024 00:01:40 -0700 Subject: [PATCH 01/16] Pagination bug --- components/project/eval/eval.tsx | 4 ++-- components/project/eval/prompts.tsx | 4 ++-- lib/services/trace_service.ts | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/components/project/eval/eval.tsx b/components/project/eval/eval.tsx index 8441bfdc..7b139ddd 100644 --- a/components/project/eval/eval.tsx +++ b/components/project/eval/eval.tsx @@ -62,9 +62,9 @@ export default function Eval({ email }: { email: string }) { } if (result) { if (data) { - setData((prevData: any) => [...prevData, ...result.prompts.result]); + setData((prevData: any) => [...prevData, ...result?.prompts?.result || []]); } else { - setData(result.prompts.result); + setData(result?.prompts?.result || []); } } setPage((currentPage) => currentPage + 1); diff --git a/components/project/eval/prompts.tsx b/components/project/eval/prompts.tsx index bb385183..9ea37d51 100644 --- a/components/project/eval/prompts.tsx +++ b/components/project/eval/prompts.tsx @@ -52,9 +52,9 @@ export default function Prompts({ email }: { email: string }) { } if (result) { if (data) { - setData((prevData: any) => [...prevData, ...result.prompts.result]); + setData((prevData: any) => [...prevData, ...result?.prompts?.result || []]); } else { - setData(result.prompts.result); + setData(result?.prompts?.result || []); } } setPage((currentPage) => currentPage + 1); diff --git a/lib/services/trace_service.ts b/lib/services/trace_service.ts index 1761aa06..ad7c35c7 100644 --- a/lib/services/trace_service.ts +++ b/lib/services/trace_service.ts @@ -244,7 +244,7 @@ export class TraceService implements ITraceService { const md = { page, page_size: pageSize, total_pages: totalPages }; if (page! > totalPages) { - throw Error("Page number is greater than total pages"); + page = totalPages; } const query = sql.select( `* FROM ${project_id} WHERE attributes LIKE '%${attribute}%' ORDER BY 'start_time' DESC LIMIT ${pageSize} OFFSET ${ @@ -325,7 +325,7 @@ export class TraceService implements ITraceService { const md = { page, page_size: pageSize, total_pages: totalPages }; if (page! > totalPages) { - throw Error("Page number is greater than total pages"); + page = totalPages; } const query = sql.select( `* FROM ${project_id} ORDER BY 'createdAt' DESC LIMIT ${pageSize} OFFSET ${ From 0e963e226af17ffdba6282496d1979c6e30c20ad Mon Sep 17 00:00:00 2001 From: Karthik Kalyanaraman Date: Thu, 4 Apr 2024 17:13:49 -0700 Subject: [PATCH 02/16] Bug fix --- app/api/user/route.ts | 1 - components/project/traces.tsx | 2 -- lib/utils.ts | 1 + 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/app/api/user/route.ts b/app/api/user/route.ts index d7e95f34..a10d34bf 100644 --- a/app/api/user/route.ts +++ b/app/api/user/route.ts @@ -83,7 +83,6 @@ export async function PUT(req: NextRequest) { } if ("status" in data) { - console.log("updating status"); const user = await prisma.user.update({ where: { id, diff --git a/components/project/traces.tsx b/components/project/traces.tsx index 523aa352..1f56c03e 100644 --- a/components/project/traces.tsx +++ b/components/project/traces.tsx @@ -80,8 +80,6 @@ export default function Traces({ email }: { email: string }) { } // Merge the new data with the existing data - console.log("currentData", currentData); - console.log("newData", newData); if (currentData.length > 0) { const updatedData = [...currentData, ...newData]; diff --git a/lib/utils.ts b/lib/utils.ts index 584d22f2..2c6fd519 100644 --- a/lib/utils.ts +++ b/lib/utils.ts @@ -326,6 +326,7 @@ export function calculatePriceFromUsage( output_tokens: number; } ): any { + if (!model) return { total: 0, input: 0, output: 0 }; if (vendor === "openai") { const costTable = OPENAI_PRICING[model.includes("gpt-4") ? "gpt-4" : model]; if (costTable) { From af27884c839d3900fd9e778381a986d0631877b6 Mon Sep 17 00:00:00 2001 From: Karthik Kalyanaraman Date: Sun, 9 Jun 2024 22:23:27 -0700 Subject: [PATCH 03/16] Experiments view --- .../[project_id]/evaluate/page-client.tsx | 59 +- .../experiments/[run_id]/page.tsx | 319 +++++++++ .../experiments/experiment_result.ts | 629 ++++++++++++++++++ .../[project_id]/experiments/page-client.tsx | 109 ++- components/evaluate/create-test.tsx | 4 + components/evaluate/evaluation-row.tsx | 2 +- components/evaluate/evaluation-table.tsx | 9 +- .../playground/prompt-registry-dialog.tsx | 2 +- components/shared/add-to-dataset.tsx | 2 +- components/shared/conversation-view.tsx | 52 +- components/shared/llm-picker.tsx | 2 +- components/shared/nav.tsx | 8 +- components/shared/vendor-metadata.tsx | 30 +- components/traces/trace_graph.tsx | 5 +- components/ui/sheet.tsx | 2 +- lib/utils.ts | 56 ++ 16 files changed, 1199 insertions(+), 91 deletions(-) create mode 100644 app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx create mode 100644 app/(protected)/project/[project_id]/experiments/experiment_result.ts diff --git a/app/(protected)/project/[project_id]/evaluate/page-client.tsx b/app/(protected)/project/[project_id]/evaluate/page-client.tsx index adb86b11..add3f5b6 100644 --- a/app/(protected)/project/[project_id]/evaluate/page-client.tsx +++ b/app/(protected)/project/[project_id]/evaluate/page-client.tsx @@ -8,9 +8,7 @@ import EvaluationTable, { EvaluationTableSkeleton, } from "@/components/evaluate/evaluation-table"; import { AddtoDataset } from "@/components/shared/add-to-dataset"; -import { Separator } from "@/components/ui/separator"; -import { Skeleton } from "@/components/ui/skeleton"; -import { cn } from "@/lib/utils"; +import { Button } from "@/components/ui/button"; import { Test } from "@prisma/client"; import { RabbitIcon } from "lucide-react"; import { useParams } from "next/navigation"; @@ -123,57 +121,12 @@ export default function PageClient({ email }: { email: string }) { function PageSkeleton() { return (
-
-
- {Array.from({ length: 5 }).map((_, i) => { - return ( -
-
-
- -
- - - -
- -
- ); - })} -
-
-
-
-
-
- -
- - - -
- - - -
- -
-
- -
- +
+ +
+
+
); diff --git a/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx b/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx new file mode 100644 index 00000000..82c8973b --- /dev/null +++ b/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx @@ -0,0 +1,319 @@ +"use client"; + +import { Conversation } from "@/components/shared/conversation-view"; +import { Badge } from "@/components/ui/badge"; +import { Button } from "@/components/ui/button"; +import { Checkbox } from "@/components/ui/checkbox"; +import { Separator } from "@/components/ui/separator"; +import { + Sheet, + SheetContent, + SheetDescription, + SheetHeader, + SheetTitle, +} from "@/components/ui/sheet"; +import { cn } from "@/lib/utils"; +import { ChevronLeft, ChevronRight, Copy, MoveDiagonal, X } from "lucide-react"; +import { useParams, useRouter } from "next/navigation"; +import { useState } from "react"; +import { toast } from "sonner"; +import data from "../experiment_result"; + +export default function Experiments() { + const router = useRouter(); + const runId = useParams()?.run_id as string; + const experiment = data.find((exp) => exp.eval.run_id === runId); + const [expand, setExpand] = useState( + experiment?.samples && experiment?.samples?.length > 0 + ? experiment?.samples.map(() => false) + : [] + ); + return ( +
+
+

Run ID: {runId}

+ +
+
+
+ + +
+ {!experiment || + (experiment?.samples?.length === 0 && ( +
+

+ No samples found for this experiment. +

+ +
+ ))} + {experiment?.samples && experiment?.samples?.length > 0 && ( +
+ + + + + + + + + + + + {experiment.samples.map((sample, i) => ( + { + setExpand( + expand.map((_, j) => { + return j === index ? value : expand[j]; + }) + ); + }} + /> + ))} + +
+ + InputTarget{`Output - (${experiment.eval.model})`} + Explanation +
+
+ )} +
+
+ ); +} + +function SampleRow({ + index, + sample, + plan, + model, + expand, + setExpand, +}: { + index: number; + sample: any; + plan: any; + model: string; + expand: boolean; + setExpand: (value: boolean, index: number) => void; +}) { + const [open, setOpen] = useState(false); + return ( + setOpen(!open)} + > + + e.stopPropagation()} /> + + + + {typeof sample.input === "string" + ? sample.input + : Array.isArray(sample.input) + ? sample.input[sample.input.length - 1].content + : ""} + + + + {sample.target || "none"} + + + 0 + ? sample.output.choices[sample.output.choices.length - 1].message + ?.content + : "" + } + /> +
+ + {sample.score?.value === "I" ? "INCORRECT" : "CORRECT"} + + + {sample.output?.model || ""} + +

+ {sample.output?.choices && sample.output.choices?.length > 0 + ? sample.output.choices[sample.output.choices.length - 1].message + ?.content + : ""} +

+
+ + + + {sample.score?.explanation || "none"} + + + e.preventDefault()} + onInteractOutside={(e) => { + e.preventDefault(); + }} + className={"overflow-y-scroll w-1/4"} + onClick={(e) => e.stopPropagation()} + > + + Messages + + Messages exchanged between the model and the user. + + + +

Plan

+
+ {plan.steps.map((step: any, i: number) => ( +
+ {step.solver} + {i < plan.steps.length - 1 && } +
+ ))} +
+ +

Messages

+ +
+
+ + ); +} + +function ExpandContractButton({ + index, + expand, + setExpand, +}: { + index: number; + expand: boolean; + setExpand: (expand: boolean, index: number) => void; +}) { + return ( + + ); +} + +function CopyButton({ text }: { text: string }) { + return ( + + ); +} + +function UtilityButton({ + index, + expand, + setExpand, + text, +}: { + index: number; + expand: boolean; + setExpand: (expand: boolean, index: number) => void; + text: string; +}) { + return ( +
+ + +
+ ); +} diff --git a/app/(protected)/project/[project_id]/experiments/experiment_result.ts b/app/(protected)/project/[project_id]/experiments/experiment_result.ts new file mode 100644 index 00000000..1f44338f --- /dev/null +++ b/app/(protected)/project/[project_id]/experiments/experiment_result.ts @@ -0,0 +1,629 @@ +let data = [ + { + version: 1, + status: "success", + eval: { + task: "security_guide", + task_version: 0, + task_file: "src/main.py", + task_id: "WjZbe8iaXETqiGGECFzW8E", + run_id: "k9FsZc425gEjCFNrXszPqW", + created: "2024-06-08T15:48:55", + dataset: { + name: "security_guide", + location: "example://security_guide", + }, + model: "openai/gpt-3.5-turbo", + task_attribs: {}, + task_args: {}, + model_args: {}, + config: {}, + packages: { + inspect_ai: "0.3.10", + }, + }, + plan: { + name: "plan", + steps: [ + { + solver: "generate", + params: {}, + }, + ], + config: {}, + }, + results: { + scorer: { + name: "includes", + params: {}, + }, + metrics: { + accuracy: { + name: "accuracy", + value: 0.5, + options: {}, + }, + bootstrap_std: { + name: "bootstrap_std", + value: 0.35385166383669864, + options: {}, + }, + }, + }, + stats: { + started_at: "2024-06-08T15:48:55", + completed_at: "2024-06-08T15:48:58", + model_usage: { + "openai/gpt-3.5-turbo": { + input_tokens: 30, + output_tokens: 451, + total_tokens: 481, + }, + }, + }, + samples: [ + { + id: 1, + epoch: 1, + input: [ + { + content: "How do I prevent SQL Injection attacks?", + source: "input", + role: "user", + }, + ], + target: "use parameterized queries and prepared statements", + messages: [ + { + content: "How do I prevent SQL Injection attacks?", + source: "input", + role: "user", + }, + { + content: + "1. Use parameterized queries: Parameterized queries separate SQL code from user input, preventing attackers from inserting malicious code into input fields.\n\n2. Use stored procedures: Stored procedures can prevent SQL injection attacks by defining specific methods for accessing the database, rather than allowing direct access to SQL queries.\n\n3. Input validation: Validate and sanitize all user input before using it in SQL queries. This helps prevent attackers from inserting malicious code into your database.\n\n4. Use ORM frameworks: Object-relational mapping (ORM) frameworks, such as Hibernate or Entity Framework, can help prevent SQL injection attacks by automatically handling parameterized queries and input validation.\n\n5. Limit database permissions: Only grant necessary permissions to database users to prevent unauthorized access and limit the potential damage of a successful SQL injection attack.\n\n6. Update and patch software: Regularly update and patch your database management system and web application to protect against known vulnerabilities that could be exploited by attackers.\n\n7. Use a web application firewall: Deploy a web application firewall to monitor and filter incoming traffic for potential SQL injection attacks. This can help protect your application from malicious code injection.", + source: "generate", + role: "assistant", + }, + ], + output: { + model: "gpt-3.5-turbo-0125", + choices: [ + { + message: { + content: + "1. Use parameterized queries: Parameterized queries separate SQL code from user input, preventing attackers from inserting malicious code into input fields.\n\n2. Use stored procedures: Stored procedures can prevent SQL injection attacks by defining specific methods for accessing the database, rather than allowing direct access to SQL queries.\n\n3. Input validation: Validate and sanitize all user input before using it in SQL queries. This helps prevent attackers from inserting malicious code into your database.\n\n4. Use ORM frameworks: Object-relational mapping (ORM) frameworks, such as Hibernate or Entity Framework, can help prevent SQL injection attacks by automatically handling parameterized queries and input validation.\n\n5. Limit database permissions: Only grant necessary permissions to database users to prevent unauthorized access and limit the potential damage of a successful SQL injection attack.\n\n6. Update and patch software: Regularly update and patch your database management system and web application to protect against known vulnerabilities that could be exploited by attackers.\n\n7. Use a web application firewall: Deploy a web application firewall to monitor and filter incoming traffic for potential SQL injection attacks. This can help protect your application from malicious code injection.", + source: "generate", + role: "assistant", + }, + stop_reason: "stop", + }, + ], + usage: { + input_tokens: 15, + output_tokens: 222, + total_tokens: 237, + }, + }, + score: { + value: "I", + answer: + "1. Use parameterized queries: Parameterized queries separate SQL code from user input, preventing attackers from inserting malicious code into input fields.\n\n2. Use stored procedures: Stored procedures can prevent SQL injection attacks by defining specific methods for accessing the database, rather than allowing direct access to SQL queries.\n\n3. Input validation: Validate and sanitize all user input before using it in SQL queries. This helps prevent attackers from inserting malicious code into your database.\n\n4. Use ORM frameworks: Object-relational mapping (ORM) frameworks, such as Hibernate or Entity Framework, can help prevent SQL injection attacks by automatically handling parameterized queries and input validation.\n\n5. Limit database permissions: Only grant necessary permissions to database users to prevent unauthorized access and limit the potential damage of a successful SQL injection attack.\n\n6. Update and patch software: Regularly update and patch your database management system and web application to protect against known vulnerabilities that could be exploited by attackers.\n\n7. Use a web application firewall: Deploy a web application firewall to monitor and filter incoming traffic for potential SQL injection attacks. This can help protect your application from malicious code injection.", + explanation: + "1. Use parameterized queries: Parameterized queries separate SQL code from user input, preventing attackers from inserting malicious code into input fields.\n\n2. Use stored procedures: Stored procedures can prevent SQL injection attacks by defining specific methods for accessing the database, rather than allowing direct access to SQL queries.\n\n3. Input validation: Validate and sanitize all user input before using it in SQL queries. This helps prevent attackers from inserting malicious code into your database.\n\n4. Use ORM frameworks: Object-relational mapping (ORM) frameworks, such as Hibernate or Entity Framework, can help prevent SQL injection attacks by automatically handling parameterized queries and input validation.\n\n5. Limit database permissions: Only grant necessary permissions to database users to prevent unauthorized access and limit the potential damage of a successful SQL injection attack.\n\n6. Update and patch software: Regularly update and patch your database management system and web application to protect against known vulnerabilities that could be exploited by attackers.\n\n7. Use a web application firewall: Deploy a web application firewall to monitor and filter incoming traffic for potential SQL injection attacks. This can help protect your application from malicious code injection.", + }, + metadata: {}, + }, + { + id: 2, + epoch: 1, + input: [ + { + content: "How do I prevent SQL Injection attacks?", + source: "input", + role: "user", + }, + ], + target: "Use parameterized queries", + messages: [ + { + content: "How do I prevent SQL Injection attacks?", + source: "input", + role: "user", + }, + { + content: + "There are several steps you can take to prevent SQL Injection attacks:\n\n1. Use parameterized queries: Use parameterized queries instead of directly embedding user input into SQL statements. This helps to separate data from code and prevents attackers from manipulating the query structure.\n\n2. Input validation: Validate all user input before using it in SQL queries. Make sure that the input matches the expected format and type.\n\n3. Escape user input: If you must dynamically generate SQL queries using user input, make sure to escape special characters in the input to prevent SQL injection.\n\n4. Use stored procedures: Use stored procedures to encapsulate your SQL logic and avoid direct manipulation of SQL queries.\n\n5. Limit database permissions: Restrict the permissions of the database user accounts to limit the damage that can be done in case of a successful SQL injection attack.\n\n6. Update regularly: Keep your database software and applications up to date to ensure that any security vulnerabilities are patched.\n\n7. Use a web application firewall: Implement a web application firewall that can detect and prevent SQL injection attacks.\n\nBy following these best practices, you can significantly reduce the risk of SQL injection attacks on your application.", + source: "generate", + role: "assistant", + }, + ], + output: { + model: "gpt-3.5-turbo-0125", + choices: [ + { + message: { + content: + "There are several steps you can take to prevent SQL Injection attacks:\n\n1. Use parameterized queries: Use parameterized queries instead of directly embedding user input into SQL statements. This helps to separate data from code and prevents attackers from manipulating the query structure.\n\n2. Input validation: Validate all user input before using it in SQL queries. Make sure that the input matches the expected format and type.\n\n3. Escape user input: If you must dynamically generate SQL queries using user input, make sure to escape special characters in the input to prevent SQL injection.\n\n4. Use stored procedures: Use stored procedures to encapsulate your SQL logic and avoid direct manipulation of SQL queries.\n\n5. Limit database permissions: Restrict the permissions of the database user accounts to limit the damage that can be done in case of a successful SQL injection attack.\n\n6. Update regularly: Keep your database software and applications up to date to ensure that any security vulnerabilities are patched.\n\n7. Use a web application firewall: Implement a web application firewall that can detect and prevent SQL injection attacks.\n\nBy following these best practices, you can significantly reduce the risk of SQL injection attacks on your application.", + source: "generate", + role: "assistant", + }, + stop_reason: "stop", + }, + ], + usage: { + input_tokens: 15, + output_tokens: 229, + total_tokens: 244, + }, + }, + score: { + value: "C", + answer: + "There are several steps you can take to prevent SQL Injection attacks:\n\n1. Use parameterized queries: Use parameterized queries instead of directly embedding user input into SQL statements. This helps to separate data from code and prevents attackers from manipulating the query structure.\n\n2. Input validation: Validate all user input before using it in SQL queries. Make sure that the input matches the expected format and type.\n\n3. Escape user input: If you must dynamically generate SQL queries using user input, make sure to escape special characters in the input to prevent SQL injection.\n\n4. Use stored procedures: Use stored procedures to encapsulate your SQL logic and avoid direct manipulation of SQL queries.\n\n5. Limit database permissions: Restrict the permissions of the database user accounts to limit the damage that can be done in case of a successful SQL injection attack.\n\n6. Update regularly: Keep your database software and applications up to date to ensure that any security vulnerabilities are patched.\n\n7. Use a web application firewall: Implement a web application firewall that can detect and prevent SQL injection attacks.\n\nBy following these best practices, you can significantly reduce the risk of SQL injection attacks on your application.", + explanation: + "There are several steps you can take to prevent SQL Injection attacks:\n\n1. Use parameterized queries: Use parameterized queries instead of directly embedding user input into SQL statements. This helps to separate data from code and prevents attackers from manipulating the query structure.\n\n2. Input validation: Validate all user input before using it in SQL queries. Make sure that the input matches the expected format and type.\n\n3. Escape user input: If you must dynamically generate SQL queries using user input, make sure to escape special characters in the input to prevent SQL injection.\n\n4. Use stored procedures: Use stored procedures to encapsulate your SQL logic and avoid direct manipulation of SQL queries.\n\n5. Limit database permissions: Restrict the permissions of the database user accounts to limit the damage that can be done in case of a successful SQL injection attack.\n\n6. Update regularly: Keep your database software and applications up to date to ensure that any security vulnerabilities are patched.\n\n7. Use a web application firewall: Implement a web application firewall that can detect and prevent SQL injection attacks.\n\nBy following these best practices, you can significantly reduce the risk of SQL injection attacks on your application.", + }, + metadata: {}, + }, + ], + logging: [], + }, + { + version: 1, + status: "success", + eval: { + task: "security_guide", + task_version: 0, + task_file: "src/main.py", + task_id: "FuR38tV2t6a3jKjED9Qq8R", + run_id: "aYmmWwRL8PBH8wPnBo3e3Z", + created: "2024-06-08T16:22:05", + dataset: { + name: "security_guide", + location: "example://security_guide", + }, + model: "openai/gpt-3.5-turbo", + task_attribs: {}, + task_args: {}, + model_args: {}, + config: {}, + packages: { + inspect_ai: "0.3.10", + }, + }, + plan: { + name: "plan", + steps: [ + { + solver: "generate", + params: {}, + }, + ], + config: {}, + }, + results: { + scorer: { + name: "check_length", + params: {}, + }, + metrics: { + accuracy: { + name: "accuracy", + value: 0.0, + options: {}, + }, + }, + }, + stats: { + started_at: "2024-06-08T16:22:05", + completed_at: "2024-06-08T16:22:06", + model_usage: { + "openai/gpt-3.5-turbo": { + input_tokens: 32, + output_tokens: 74, + total_tokens: 106, + }, + }, + }, + samples: [ + { + id: 1, + epoch: 1, + input: [ + { + content: "Write a funny tweet about newyork city?", + source: "input", + role: "user", + }, + ], + target: "", + messages: [ + { + content: "Write a funny tweet about newyork city?", + source: "input", + role: "user", + }, + { + content: + '"Just spent 3 hours trying to find a public restroom in NYC, ended up accidentally walking into a trendy hipster coffee shop bathroom where they charge $10 for a latte with a side of TP #cityproblems #NYC"', + source: "generate", + role: "assistant", + }, + ], + output: { + model: "gpt-3.5-turbo-0125", + choices: [ + { + message: { + content: + '"Just spent 3 hours trying to find a public restroom in NYC, ended up accidentally walking into a trendy hipster coffee shop bathroom where they charge $10 for a latte with a side of TP #cityproblems #NYC"', + source: "generate", + role: "assistant", + }, + stop_reason: "stop", + }, + ], + usage: { + input_tokens: 17, + output_tokens: 47, + total_tokens: 64, + }, + }, + score: { + value: "I", + answer: + '"Just spent 3 hours trying to find a public restroom in NYC, ended up accidentally walking into a trendy hipster coffee shop bathroom where they charge $10 for a latte with a side of TP #cityproblems #NYC"', + }, + metadata: {}, + }, + { + id: 2, + epoch: 1, + input: [ + { + content: "Write a funny tweet about vancouver?", + source: "input", + role: "user", + }, + ], + target: "", + messages: [ + { + content: "Write a funny tweet about vancouver?", + source: "input", + role: "user", + }, + { + content: + "Just saw someone try to use an umbrella in Vancouver... and that's how you know they're not a local. #raincitylife", + source: "generate", + role: "assistant", + }, + ], + output: { + model: "gpt-3.5-turbo-0125", + choices: [ + { + message: { + content: + "Just saw someone try to use an umbrella in Vancouver... and that's how you know they're not a local. #raincitylife", + source: "generate", + role: "assistant", + }, + stop_reason: "stop", + }, + ], + usage: { + input_tokens: 15, + output_tokens: 27, + total_tokens: 42, + }, + }, + score: { + value: "I", + answer: + "Just saw someone try to use an umbrella in Vancouver... and that's how you know they're not a local. #raincitylife", + }, + metadata: {}, + }, + ], + logging: [], + }, + { + version: 1, + status: "success", + eval: { + task: "security_guide", + task_version: 0, + task_file: "src/main.py", + task_id: "WaGqtQ7sZYWkmmDYcubjiv", + run_id: "ar7LzC2PefbchfLMosfAWF", + created: "2024-05-30T11:06:32", + dataset: { + name: "security_guide", + location: "example://security_guide", + }, + model: "openai/gpt-3.5-turbo", + task_attribs: {}, + task_args: {}, + model_args: {}, + config: {}, + packages: { + inspect_ai: "0.3.10", + }, + }, + plan: { + name: "plan", + steps: [ + { + solver: "chain_of_thought", + params: {}, + }, + { + solver: "generate", + params: {}, + }, + { + solver: "self_critique", + params: {}, + }, + ], + config: {}, + }, + results: { + scorer: { + name: "model_graded_fact", + params: {}, + }, + metrics: { + accuracy: { + name: "accuracy", + value: 1, + options: {}, + }, + bootstrap_std: { + name: "bootstrap_std", + value: 0, + options: {}, + }, + }, + }, + stats: { + started_at: "2024-05-30T11:06:32", + completed_at: "2024-05-30T11:06:45", + model_usage: { + "openai/gpt-3.5-turbo": { + input_tokens: 1677, + output_tokens: 728, + total_tokens: 2405, + }, + }, + }, + samples: [ + { + id: 1, + epoch: 1, + input: "How do I prevent SQL Injection attacks?", + target: "use parameterized queries and prepared statements", + messages: [ + { + content: + '\nHow do I prevent SQL Injection attacks?\n\nBefore answering, reason in a step-by-step manner as to get the right answer. Provide your answer at the end on its own line in the form "ANSWER: $ANSWER" (without quotes) where $ANSWER is the answer to the question.\n', + source: "input", + role: "user", + }, + { + content: + "1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack that allows attackers to manipulate the SQL code of a web application to gain unauthorized access to the database or to perform harmful actions.\n\n2. Sanitize user inputs: One of the most common ways attackers exploit SQL Injection vulnerabilities is through user inputs. Make sure to validate and sanitize all user inputs to prevent malicious code from being executed.\n\n3. Use parameterized queries: Instead of dynamically creating SQL queries by concatenating strings, use parameterized queries which automatically sanitize inputs by treating them as data rather than executable code.\n\n4. Limit database permissions: Ensure that the database user account used by your application has the least amount of privileges necessary. Avoid using an admin account for regular application tasks.\n\n5. Update and patch regularly: Make sure your database management system, web server, and application software are up to date with the latest security patches to protect against known vulnerabilities.\n\n6. Implement input validation: Validate and sanitize user inputs on both client-side and server-side to reduce the risk of SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, sanitize user inputs, use parameterized queries, limit database permissions, update and patch regularly, and implement input validation.", + source: "generate", + role: "assistant", + }, + { + content: + '\nGiven the following question, initial answer and critique please generate an improved answer to the question:\n\n[BEGIN DATA]\n***\n[Question]: How do I prevent SQL Injection attacks?\n***\n[Answer]: 1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack that allows attackers to manipulate the SQL code of a web application to gain unauthorized access to the database or to perform harmful actions.\n\n2. Sanitize user inputs: One of the most common ways attackers exploit SQL Injection vulnerabilities is through user inputs. Make sure to validate and sanitize all user inputs to prevent malicious code from being executed.\n\n3. Use parameterized queries: Instead of dynamically creating SQL queries by concatenating strings, use parameterized queries which automatically sanitize inputs by treating them as data rather than executable code.\n\n4. Limit database permissions: Ensure that the database user account used by your application has the least amount of privileges necessary. Avoid using an admin account for regular application tasks.\n\n5. Update and patch regularly: Make sure your database management system, web server, and application software are up to date with the latest security patches to protect against known vulnerabilities.\n\n6. Implement input validation: Validate and sanitize user inputs on both client-side and server-side to reduce the risk of SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, sanitize user inputs, use parameterized queries, limit database permissions, update and patch regularly, and implement input validation.\n***\n[Critique]: The answer provides a good overview of ways to prevent SQL Injection attacks, but it lacks depth and specificity in explaining each point. It would be beneficial to provide more details and examples for each prevention method to ensure a comprehensive understanding for the reader. Additionally, the answer could benefit from explaining why each prevention method is effective in stopping SQL Injection attacks.\n***\n[END DATA]\n\nIf the original answer is already correct, just repeat the original answer exactly. Provide your answer at the end on its own line in the form "ANSWER: $ANSWER" (without quotes) where $ANSWER is the answer to the question.\n', + role: "user", + }, + { + content: + "Improved Answer:\n1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack where malicious SQL statements are inserted into an entry field for execution. This can allow attackers to gain unauthorized access to data or manipulate it in unintended ways.\n\n2. Sanitize user inputs: By validating and sanitizing all user inputs, you can ensure that no malicious SQL code is included. This involves removing potentially harmful characters or encoding inputs to prevent attacks.\n\n3. Use parameterized queries: Parameterized queries separate SQL code from user input, treating them as data rather than executable code. This prevents attackers from injecting SQL statements through user inputs.\n\n4. Limit database permissions: Implement a principle of least privilege by granting only necessary permissions to database users. Avoid using overly permissive admin accounts for everyday tasks, reducing the potential impact of a successful attack.\n\n5. Update and patch regularly: Keeping all software components up to date with security patches is crucial to fixing known vulnerabilities that could be exploited by attackers. This helps in safeguarding against SQL Injection attacks.\n\n6. Implement input validation: Utilize both client-side and server-side validation mechanisms to verify and sanitize inputs before interacting with the database. This multi-layered approach can significantly reduce the risk of successful SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, it is essential to understand the nature of the attacks, sanitize user inputs, use parameterized queries, limit database permissions, update software regularly, and implement thorough input validation.", + source: "generate", + role: "assistant", + }, + ], + output: { + model: "gpt-3.5-turbo-0125", + choices: [ + { + message: { + content: + "Improved Answer:\n1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack where malicious SQL statements are inserted into an entry field for execution. This can allow attackers to gain unauthorized access to data or manipulate it in unintended ways.\n\n2. Sanitize user inputs: By validating and sanitizing all user inputs, you can ensure that no malicious SQL code is included. This involves removing potentially harmful characters or encoding inputs to prevent attacks.\n\n3. Use parameterized queries: Parameterized queries separate SQL code from user input, treating them as data rather than executable code. This prevents attackers from injecting SQL statements through user inputs.\n\n4. Limit database permissions: Implement a principle of least privilege by granting only necessary permissions to database users. Avoid using overly permissive admin accounts for everyday tasks, reducing the potential impact of a successful attack.\n\n5. Update and patch regularly: Keeping all software components up to date with security patches is crucial to fixing known vulnerabilities that could be exploited by attackers. This helps in safeguarding against SQL Injection attacks.\n\n6. Implement input validation: Utilize both client-side and server-side validation mechanisms to verify and sanitize inputs before interacting with the database. This multi-layered approach can significantly reduce the risk of successful SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, it is essential to understand the nature of the attacks, sanitize user inputs, use parameterized queries, limit database permissions, update software regularly, and implement thorough input validation.", + source: "generate", + role: "assistant", + }, + stop_reason: "stop", + }, + ], + usage: { + input_tokens: 738, + output_tokens: 294, + total_tokens: 1032, + }, + }, + score: { + value: "C", + answer: + "Improved Answer:\n1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack where malicious SQL statements are inserted into an entry field for execution. This can allow attackers to gain unauthorized access to data or manipulate it in unintended ways.\n\n2. Sanitize user inputs: By validating and sanitizing all user inputs, you can ensure that no malicious SQL code is included. This involves removing potentially harmful characters or encoding inputs to prevent attacks.\n\n3. Use parameterized queries: Parameterized queries separate SQL code from user input, treating them as data rather than executable code. This prevents attackers from injecting SQL statements through user inputs.\n\n4. Limit database permissions: Implement a principle of least privilege by granting only necessary permissions to database users. Avoid using overly permissive admin accounts for everyday tasks, reducing the potential impact of a successful attack.\n\n5. Update and patch regularly: Keeping all software components up to date with security patches is crucial to fixing known vulnerabilities that could be exploited by attackers. This helps in safeguarding against SQL Injection attacks.\n\n6. Implement input validation: Utilize both client-side and server-side validation mechanisms to verify and sanitize inputs before interacting with the database. This multi-layered approach can significantly reduce the risk of successful SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, it is essential to understand the nature of the attacks, sanitize user inputs, use parameterized queries, limit database permissions, update software regularly, and implement thorough input validation.", + explanation: + "1. The expert answer mentions using parameterized queries and prepared statements as a way to prevent SQL Injection attacks.\n2. The submission also includes using parameterized queries as one of the ways to prevent SQL Injection attacks.\n3. The submission expands on the concept, explaining what SQL Injection attacks are, sanitizing user inputs, limiting database permissions, updating software regularly, and implementing input validation.\n4. All additional points mentioned in the submission are relevant and contribute to preventing SQL Injection attacks.\n\nAfter a thorough comparison, it is evident that the submission contains the content mentioned in the expert answer. \n\nGRADE: C", + metadata: { + grading: [ + { + content: + "\nYou are comparing a submitted answer to an expert answer on a given question. Here is the data:\n\n[BEGIN DATA]\n************\n[Question]: How do I prevent SQL Injection attacks?\n************\n[Expert]: use parameterized queries and prepared statements\n************\n[Submission]: Improved Answer:\n1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack where malicious SQL statements are inserted into an entry field for execution. This can allow attackers to gain unauthorized access to data or manipulate it in unintended ways.\n\n2. Sanitize user inputs: By validating and sanitizing all user inputs, you can ensure that no malicious SQL code is included. This involves removing potentially harmful characters or encoding inputs to prevent attacks.\n\n3. Use parameterized queries: Parameterized queries separate SQL code from user input, treating them as data rather than executable code. This prevents attackers from injecting SQL statements through user inputs.\n\n4. Limit database permissions: Implement a principle of least privilege by granting only necessary permissions to database users. Avoid using overly permissive admin accounts for everyday tasks, reducing the potential impact of a successful attack.\n\n5. Update and patch regularly: Keeping all software components up to date with security patches is crucial to fixing known vulnerabilities that could be exploited by attackers. This helps in safeguarding against SQL Injection attacks.\n\n6. Implement input validation: Utilize both client-side and server-side validation mechanisms to verify and sanitize inputs before interacting with the database. This multi-layered approach can significantly reduce the risk of successful SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, it is essential to understand the nature of the attacks, sanitize user inputs, use parameterized queries, limit database permissions, update software regularly, and implement thorough input validation.\n************\n[END DATA]\n\nCompare the factual content of the submitted answer with the expert answer. Ignore any differences in style, grammar, or punctuation.\n\nDoes the submission contain the content in the expert answer?\n\n\nAfter assessing the submitted answer, reply with 'GRADE: $LETTER' (without quotes) where LETTER is one of CI. Please choose ONE option for the grade: either \"C\" for correct answers, or \"I\" for incorrect answers.\n\nFor example, after reviewing a correct answer you might write 'GRADE: C' or after reviewing an incorrect answer you might write 'GRADE: I'.\n\nFirst, write out in a step by step manner your reasoning about the criterion to be sure that your conclusion is correct. Avoid simply stating the correct answers at the outset. Then, end with your answer formatted as 'GRADE: $LETTER' (without quotes) where LETTER is one of CI.\n\n", + role: "user", + }, + { + content: + "1. The expert answer mentions using parameterized queries and prepared statements as a way to prevent SQL Injection attacks.\n2. The submission also includes using parameterized queries as one of the ways to prevent SQL Injection attacks.\n3. The submission expands on the concept, explaining what SQL Injection attacks are, sanitizing user inputs, limiting database permissions, updating software regularly, and implementing input validation.\n4. All additional points mentioned in the submission are relevant and contribute to preventing SQL Injection attacks.\n\nAfter a thorough comparison, it is evident that the submission contains the content mentioned in the expert answer. \n\nGRADE: C", + source: "generate", + role: "assistant", + }, + ], + }, + }, + metadata: {}, + }, + ], + logging: [], + }, + { + version: 1, + status: "success", + eval: { + task: "security_guide", + task_version: 0, + task_file: "src/main.py", + task_id: "WaGqtQ7sZYWkmmDYcubjiv", + run_id: "ar7LzC2PefbchfLMasaasaF", + created: "2024-05-30T11:06:32", + dataset: { + name: "security_guide", + location: "example://security_guide", + }, + model: "openai/gpt-4", + task_attribs: {}, + task_args: {}, + model_args: {}, + config: {}, + packages: { + inspect_ai: "0.3.10", + }, + }, + plan: { + name: "plan", + steps: [ + { + solver: "chain_of_thought", + params: {}, + }, + { + solver: "generate", + params: {}, + }, + { + solver: "self_critique", + params: {}, + }, + ], + config: {}, + }, + results: { + scorer: { + name: "model_graded_fact", + params: {}, + }, + metrics: { + accuracy: { + name: "accuracy", + value: 1, + options: {}, + }, + bootstrap_std: { + name: "bootstrap_std", + value: 0, + options: {}, + }, + }, + }, + stats: { + started_at: "2024-05-30T11:06:32", + completed_at: "2024-05-30T11:06:45", + model_usage: { + "openai/gpt-4": { + input_tokens: 1677, + output_tokens: 728, + total_tokens: 2405, + }, + }, + }, + samples: [ + { + id: 1, + epoch: 1, + input: "How do I prevent SQL Injection attacks?", + target: "use parameterized queries and prepared statements", + messages: [ + { + content: + '\nHow do I prevent SQL Injection attacks?\n\nBefore answering, reason in a step-by-step manner as to get the right answer. Provide your answer at the end on its own line in the form "ANSWER: $ANSWER" (without quotes) where $ANSWER is the answer to the question.\n', + source: "input", + role: "user", + }, + { + content: + "1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack that allows attackers to manipulate the SQL code of a web application to gain unauthorized access to the database or to perform harmful actions.\n\n2. Sanitize user inputs: One of the most common ways attackers exploit SQL Injection vulnerabilities is through user inputs. Make sure to validate and sanitize all user inputs to prevent malicious code from being executed.\n\n3. Use parameterized queries: Instead of dynamically creating SQL queries by concatenating strings, use parameterized queries which automatically sanitize inputs by treating them as data rather than executable code.\n\n4. Limit database permissions: Ensure that the database user account used by your application has the least amount of privileges necessary. Avoid using an admin account for regular application tasks.\n\n5. Update and patch regularly: Make sure your database management system, web server, and application software are up to date with the latest security patches to protect against known vulnerabilities.\n\n6. Implement input validation: Validate and sanitize user inputs on both client-side and server-side to reduce the risk of SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, sanitize user inputs, use parameterized queries, limit database permissions, update and patch regularly, and implement input validation.", + source: "generate", + role: "assistant", + }, + { + content: + '\nGiven the following question, initial answer and critique please generate an improved answer to the question:\n\n[BEGIN DATA]\n***\n[Question]: How do I prevent SQL Injection attacks?\n***\n[Answer]: 1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack that allows attackers to manipulate the SQL code of a web application to gain unauthorized access to the database or to perform harmful actions.\n\n2. Sanitize user inputs: One of the most common ways attackers exploit SQL Injection vulnerabilities is through user inputs. Make sure to validate and sanitize all user inputs to prevent malicious code from being executed.\n\n3. Use parameterized queries: Instead of dynamically creating SQL queries by concatenating strings, use parameterized queries which automatically sanitize inputs by treating them as data rather than executable code.\n\n4. Limit database permissions: Ensure that the database user account used by your application has the least amount of privileges necessary. Avoid using an admin account for regular application tasks.\n\n5. Update and patch regularly: Make sure your database management system, web server, and application software are up to date with the latest security patches to protect against known vulnerabilities.\n\n6. Implement input validation: Validate and sanitize user inputs on both client-side and server-side to reduce the risk of SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, sanitize user inputs, use parameterized queries, limit database permissions, update and patch regularly, and implement input validation.\n***\n[Critique]: The answer provides a good overview of ways to prevent SQL Injection attacks, but it lacks depth and specificity in explaining each point. It would be beneficial to provide more details and examples for each prevention method to ensure a comprehensive understanding for the reader. Additionally, the answer could benefit from explaining why each prevention method is effective in stopping SQL Injection attacks.\n***\n[END DATA]\n\nIf the original answer is already correct, just repeat the original answer exactly. Provide your answer at the end on its own line in the form "ANSWER: $ANSWER" (without quotes) where $ANSWER is the answer to the question.\n', + role: "user", + }, + { + content: + "Improved Answer:\n1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack where malicious SQL statements are inserted into an entry field for execution. This can allow attackers to gain unauthorized access to data or manipulate it in unintended ways.\n\n2. Sanitize user inputs: By validating and sanitizing all user inputs, you can ensure that no malicious SQL code is included. This involves removing potentially harmful characters or encoding inputs to prevent attacks.\n\n3. Use parameterized queries: Parameterized queries separate SQL code from user input, treating them as data rather than executable code. This prevents attackers from injecting SQL statements through user inputs.\n\n4. Limit database permissions: Implement a principle of least privilege by granting only necessary permissions to database users. Avoid using overly permissive admin accounts for everyday tasks, reducing the potential impact of a successful attack.\n\n5. Update and patch regularly: Keeping all software components up to date with security patches is crucial to fixing known vulnerabilities that could be exploited by attackers. This helps in safeguarding against SQL Injection attacks.\n\n6. Implement input validation: Utilize both client-side and server-side validation mechanisms to verify and sanitize inputs before interacting with the database. This multi-layered approach can significantly reduce the risk of successful SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, it is essential to understand the nature of the attacks, sanitize user inputs, use parameterized queries, limit database permissions, update software regularly, and implement thorough input validation.", + source: "generate", + role: "assistant", + }, + ], + output: { + model: "gpt-4", + choices: [ + { + message: { + content: + "Improved Answer:\n1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack where malicious SQL statements are inserted into an entry field for execution. This can allow attackers to gain unauthorized access to data or manipulate it in unintended ways.\n\n2. Sanitize user inputs: By validating and sanitizing all user inputs, you can ensure that no malicious SQL code is included. This involves removing potentially harmful characters or encoding inputs to prevent attacks.\n\n3. Use parameterized queries: Parameterized queries separate SQL code from user input, treating them as data rather than executable code. This prevents attackers from injecting SQL statements through user inputs.\n\n4. Limit database permissions: Implement a principle of least privilege by granting only necessary permissions to database users. Avoid using overly permissive admin accounts for everyday tasks, reducing the potential impact of a successful attack.\n\n5. Update and patch regularly: Keeping all software components up to date with security patches is crucial to fixing known vulnerabilities that could be exploited by attackers. This helps in safeguarding against SQL Injection attacks.\n\n6. Implement input validation: Utilize both client-side and server-side validation mechanisms to verify and sanitize inputs before interacting with the database. This multi-layered approach can significantly reduce the risk of successful SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, it is essential to understand the nature of the attacks, sanitize user inputs, use parameterized queries, limit database permissions, update software regularly, and implement thorough input validation.", + source: "generate", + role: "assistant", + }, + stop_reason: "stop", + }, + ], + usage: { + input_tokens: 738, + output_tokens: 294, + total_tokens: 1032, + }, + }, + score: { + value: "C", + answer: + "Improved Answer:\n1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack where malicious SQL statements are inserted into an entry field for execution. This can allow attackers to gain unauthorized access to data or manipulate it in unintended ways.\n\n2. Sanitize user inputs: By validating and sanitizing all user inputs, you can ensure that no malicious SQL code is included. This involves removing potentially harmful characters or encoding inputs to prevent attacks.\n\n3. Use parameterized queries: Parameterized queries separate SQL code from user input, treating them as data rather than executable code. This prevents attackers from injecting SQL statements through user inputs.\n\n4. Limit database permissions: Implement a principle of least privilege by granting only necessary permissions to database users. Avoid using overly permissive admin accounts for everyday tasks, reducing the potential impact of a successful attack.\n\n5. Update and patch regularly: Keeping all software components up to date with security patches is crucial to fixing known vulnerabilities that could be exploited by attackers. This helps in safeguarding against SQL Injection attacks.\n\n6. Implement input validation: Utilize both client-side and server-side validation mechanisms to verify and sanitize inputs before interacting with the database. This multi-layered approach can significantly reduce the risk of successful SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, it is essential to understand the nature of the attacks, sanitize user inputs, use parameterized queries, limit database permissions, update software regularly, and implement thorough input validation.", + explanation: + "1. The expert answer mentions using parameterized queries and prepared statements as a way to prevent SQL Injection attacks.\n2. The submission also includes using parameterized queries as one of the ways to prevent SQL Injection attacks.\n3. The submission expands on the concept, explaining what SQL Injection attacks are, sanitizing user inputs, limiting database permissions, updating software regularly, and implementing input validation.\n4. All additional points mentioned in the submission are relevant and contribute to preventing SQL Injection attacks.\n\nAfter a thorough comparison, it is evident that the submission contains the content mentioned in the expert answer. \n\nGRADE: C", + metadata: { + grading: [ + { + content: + "\nYou are comparing a submitted answer to an expert answer on a given question. Here is the data:\n\n[BEGIN DATA]\n************\n[Question]: How do I prevent SQL Injection attacks?\n************\n[Expert]: use parameterized queries and prepared statements\n************\n[Submission]: Improved Answer:\n1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack where malicious SQL statements are inserted into an entry field for execution. This can allow attackers to gain unauthorized access to data or manipulate it in unintended ways.\n\n2. Sanitize user inputs: By validating and sanitizing all user inputs, you can ensure that no malicious SQL code is included. This involves removing potentially harmful characters or encoding inputs to prevent attacks.\n\n3. Use parameterized queries: Parameterized queries separate SQL code from user input, treating them as data rather than executable code. This prevents attackers from injecting SQL statements through user inputs.\n\n4. Limit database permissions: Implement a principle of least privilege by granting only necessary permissions to database users. Avoid using overly permissive admin accounts for everyday tasks, reducing the potential impact of a successful attack.\n\n5. Update and patch regularly: Keeping all software components up to date with security patches is crucial to fixing known vulnerabilities that could be exploited by attackers. This helps in safeguarding against SQL Injection attacks.\n\n6. Implement input validation: Utilize both client-side and server-side validation mechanisms to verify and sanitize inputs before interacting with the database. This multi-layered approach can significantly reduce the risk of successful SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, it is essential to understand the nature of the attacks, sanitize user inputs, use parameterized queries, limit database permissions, update software regularly, and implement thorough input validation.\n************\n[END DATA]\n\nCompare the factual content of the submitted answer with the expert answer. Ignore any differences in style, grammar, or punctuation.\n\nDoes the submission contain the content in the expert answer?\n\n\nAfter assessing the submitted answer, reply with 'GRADE: $LETTER' (without quotes) where LETTER is one of CI. Please choose ONE option for the grade: either \"C\" for correct answers, or \"I\" for incorrect answers.\n\nFor example, after reviewing a correct answer you might write 'GRADE: C' or after reviewing an incorrect answer you might write 'GRADE: I'.\n\nFirst, write out in a step by step manner your reasoning about the criterion to be sure that your conclusion is correct. Avoid simply stating the correct answers at the outset. Then, end with your answer formatted as 'GRADE: $LETTER' (without quotes) where LETTER is one of CI.\n\n", + role: "user", + }, + { + content: + "1. The expert answer mentions using parameterized queries and prepared statements as a way to prevent SQL Injection attacks.\n2. The submission also includes using parameterized queries as one of the ways to prevent SQL Injection attacks.\n3. The submission expands on the concept, explaining what SQL Injection attacks are, sanitizing user inputs, limiting database permissions, updating software regularly, and implementing input validation.\n4. All additional points mentioned in the submission are relevant and contribute to preventing SQL Injection attacks.\n\nAfter a thorough comparison, it is evident that the submission contains the content mentioned in the expert answer. \n\nGRADE: C", + source: "generate", + role: "assistant", + }, + ], + }, + }, + metadata: {}, + }, + ], + logging: [], + }, +]; + +export default data; diff --git a/app/(protected)/project/[project_id]/experiments/page-client.tsx b/app/(protected)/project/[project_id]/experiments/page-client.tsx index 28d289d5..f84d000f 100644 --- a/app/(protected)/project/[project_id]/experiments/page-client.tsx +++ b/app/(protected)/project/[project_id]/experiments/page-client.tsx @@ -1,3 +1,110 @@ +"use client"; + +import { Badge } from "@/components/ui/badge"; +import { Button } from "@/components/ui/button"; +import { Checkbox } from "@/components/ui/checkbox"; +import { cn, formatDateTime } from "@/lib/utils"; +import { useParams, useRouter } from "next/navigation"; +import data from "./experiment_result"; + export default function Experiments() { - return <>; + const router = useRouter(); + const projectId = useParams()?.project_id as string; + return ( +
+
+

Experiments

+ +
+
+
+ + + + + + + + + + + + + + + + + + {data.map((experiment) => ( + + router.push( + `/project/${projectId}/experiments/${experiment.eval.run_id}` + ) + } + > + + + + + + + + + + + + + ))} + +
+ + Run ID + Started at + + Completed at + Task + Total Samples + Model + Plan Name + ScorerMetricsStatus
+ + + {experiment.eval.run_id} + + {formatDateTime(experiment.stats.started_at, true)} + + {formatDateTime(experiment.stats.completed_at, true)} + {experiment.eval.task} + {experiment.samples?.length || 0} + {experiment.eval.model}{experiment.plan.name} + {experiment.results?.scorer?.name || "N/A"} + + {Object.keys(experiment.results?.metrics).map( + (metric, i) => ( + + {metric + + ": " + + (experiment.results.metrics as any)[ + metric + ].value.toFixed(2)} + + ) + )} + + {experiment.status} +
+
+
+
+ ); } diff --git a/components/evaluate/create-test.tsx b/components/evaluate/create-test.tsx index 64271c3c..38af12ae 100644 --- a/components/evaluate/create-test.tsx +++ b/components/evaluate/create-test.tsx @@ -106,6 +106,10 @@ export function CreateTest({ if (parseInt(data.min, 10) === parseInt(data.max, 10)) { throw new Error("Min score cannot be equal to max score."); } + // check for a range of at least 2 + if (parseInt(data.max, 10) - parseInt(data.min, 10) < 2) { + throw new Error("Range must be at least 2."); + } setBusy(true); await fetch("/api/test", { method: "POST", diff --git a/components/evaluate/evaluation-row.tsx b/components/evaluate/evaluation-row.tsx index 28d70d98..2955064e 100644 --- a/components/evaluate/evaluation-row.tsx +++ b/components/evaluate/evaluation-row.tsx @@ -218,7 +218,7 @@ export default function EvaluationRow({ onInteractOutside={(e) => { e.preventDefault(); }} - className={cn("w-[1500px] overflow-y-scroll")} + className={cn("w-2/3 overflow-y-scroll")} onClick={(e) => e.stopPropagation()} > diff --git a/components/evaluate/evaluation-table.tsx b/components/evaluate/evaluation-table.tsx index 49442d10..25ccf220 100644 --- a/components/evaluate/evaluation-table.tsx +++ b/components/evaluate/evaluation-table.tsx @@ -227,17 +227,14 @@ export default function EvaluationTable({ export function EvaluationTableSkeleton() { return (
-
-

+

+

Timestamp (UTC)

-

Model

+

Model

Input

Output

-

Cost

PII Detected

-

Duration

-

Evaluate

User Score

User Id

Added to Dataset

diff --git a/components/playground/prompt-registry-dialog.tsx b/components/playground/prompt-registry-dialog.tsx index b6a99253..a0b1b99a 100644 --- a/components/playground/prompt-registry-dialog.tsx +++ b/components/playground/prompt-registry-dialog.tsx @@ -172,7 +172,7 @@ export function PromptRegistryCombobox({ - + No promptset found. {promptsets?.promptsets?.map((promptset: Promptset) => ( diff --git a/components/shared/add-to-dataset.tsx b/components/shared/add-to-dataset.tsx index b8f5fb71..e4bf4fd8 100644 --- a/components/shared/add-to-dataset.tsx +++ b/components/shared/add-to-dataset.tsx @@ -161,7 +161,7 @@ export default function DatasetCombobox({ - + No dataset found. {fetchDatasets.data?.datasets?.map((dataset: Dataset) => ( diff --git a/components/shared/conversation-view.tsx b/components/shared/conversation-view.tsx index c83f2924..8487eb1e 100644 --- a/components/shared/conversation-view.tsx +++ b/components/shared/conversation-view.tsx @@ -1,4 +1,4 @@ -import { safeStringify } from "@/lib/utils"; +import { getVendorFromSpan, safeStringify } from "@/lib/utils"; import UserLogo from "./user-logo"; import { VendorLogo } from "./vendor-metadata"; @@ -25,13 +25,14 @@ export default function ConversationView({ span }: { span: any }) { : prompt?.text ? safeStringify(prompt?.text) : "No input found"; + const vendor = getVendorFromSpan(span); return (
{role === "user" ? ( ) : ( - + )}

{role}

{role === "system" && ( @@ -64,13 +65,14 @@ export default function ConversationView({ span }: { span: any }) { : response?.text ? safeStringify(response?.text) : "No output found"; + const vendor = getVendorFromSpan(span); return (
{role === "user" ? ( ) : ( - + )}

{role}

@@ -86,3 +88,47 @@ export default function ConversationView({ span }: { span: any }) {
); } + +interface Message { + content: string; + role: string; + source: string; +} + +export function Conversation({ + model, + messages, +}: { + model: string; + messages: Message[]; +}) { + const vendorMetadata = model?.split("/"); + const vendor = vendorMetadata[0] || "openai"; + return ( +
+ {messages.map((message, i) => { + const role = message.role.toLowerCase(); + const content = message.content; + return ( +
+
+ {role === "user" ? : } +

{role}

+ {role === "system" && ( +

+ Prompt +

+ )} +
+
+
+ ); + })} +
+ ); +} diff --git a/components/shared/llm-picker.tsx b/components/shared/llm-picker.tsx index 35cf19e6..5c7a5ac7 100644 --- a/components/shared/llm-picker.tsx +++ b/components/shared/llm-picker.tsx @@ -42,7 +42,7 @@ export default function LLMPicker({ - + No LLM provider found. {LLM_VENDORS.map((llm) => ( diff --git a/components/shared/nav.tsx b/components/shared/nav.tsx index f96803a8..1d6bd46b 100644 --- a/components/shared/nav.tsx +++ b/components/shared/nav.tsx @@ -27,10 +27,10 @@ const ProjectNavLinks = (id: string) => [ name: "Evaluate", href: `/project/${id}/evaluate`, }, - // { - // name: "Experiments", - // href: `/project/${id}/experiments`, - // }, + { + name: "Experiments", + href: `/project/${id}/experiments`, + }, { name: "Datasets", href: `/project/${id}/datasets`, diff --git a/components/shared/vendor-metadata.tsx b/components/shared/vendor-metadata.tsx index 233331da..cd5d040c 100644 --- a/components/shared/vendor-metadata.tsx +++ b/components/shared/vendor-metadata.tsx @@ -113,19 +113,13 @@ export function serviceTypeColor(serviceType: string) { } export function VendorLogo({ - span, + vendor, variant = "default", }: { - span: Span; + vendor: string; variant?: string; }) { - const attributes = span.attributes ? JSON.parse(span.attributes) : {}; - let serviceName = ""; - if (attributes["langtrace.service.name"]) { - serviceName = attributes["langtrace.service.name"].toLowerCase(); - } - - if (span.name.includes("groq") || serviceName.includes("groq")) { + if (vendor.includes("groq")) { const color = vendorColor("groq"); return ( = ({ ) color = "bg-indigo-500"; + const vendor = getVendorFromSpan(span as any); + return (
@@ -94,7 +97,7 @@ const SpanItem: React.FC = ({ (span.children.length === 0 && (
))} - + {span.name}
Date: Sun, 9 Jun 2024 22:24:46 -0700 Subject: [PATCH 04/16] re-order --- components/shared/nav.tsx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/components/shared/nav.tsx b/components/shared/nav.tsx index 1d6bd46b..931d8a16 100644 --- a/components/shared/nav.tsx +++ b/components/shared/nav.tsx @@ -27,14 +27,14 @@ const ProjectNavLinks = (id: string) => [ name: "Evaluate", href: `/project/${id}/evaluate`, }, - { - name: "Experiments", - href: `/project/${id}/experiments`, - }, { name: "Datasets", href: `/project/${id}/datasets`, }, + { + name: "Experiments", + href: `/project/${id}/experiments`, + }, { name: "Playground", href: `/project/${id}/playground`, From a04cb3a93c90ee6dcd9e9a1e09e944bd334510a2 Mon Sep 17 00:00:00 2001 From: Karthik Kalyanaraman Date: Mon, 10 Jun 2024 15:48:42 -0700 Subject: [PATCH 05/16] Comparison view --- .../experiments/[run_id]/page.tsx | 69 +------ .../[project_id]/experiments/compare/page.tsx | 166 +++++++++++++++++ .../experiments/experiment_result.ts | 170 ++++++++++++++++++ .../[project_id]/experiments/page-client.tsx | 43 ++++- components/experiments/report-utility.tsx | 68 +++++++ 5 files changed, 445 insertions(+), 71 deletions(-) create mode 100644 app/(protected)/project/[project_id]/experiments/compare/page.tsx create mode 100644 components/experiments/report-utility.tsx diff --git a/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx b/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx index 82c8973b..d6253c47 100644 --- a/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx +++ b/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx @@ -1,5 +1,6 @@ "use client"; +import { UtilityButton } from "@/components/experiments/report-utility"; import { Conversation } from "@/components/shared/conversation-view"; import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; @@ -13,10 +14,9 @@ import { SheetTitle, } from "@/components/ui/sheet"; import { cn } from "@/lib/utils"; -import { ChevronLeft, ChevronRight, Copy, MoveDiagonal, X } from "lucide-react"; +import { ChevronLeft, ChevronRight, MoveDiagonal, X } from "lucide-react"; import { useParams, useRouter } from "next/navigation"; import { useState } from "react"; -import { toast } from "sonner"; import data from "../experiment_result"; export default function Experiments() { @@ -252,68 +252,3 @@ function SampleRow({ ); } - -function ExpandContractButton({ - index, - expand, - setExpand, -}: { - index: number; - expand: boolean; - setExpand: (expand: boolean, index: number) => void; -}) { - return ( - - ); -} - -function CopyButton({ text }: { text: string }) { - return ( - - ); -} - -function UtilityButton({ - index, - expand, - setExpand, - text, -}: { - index: number; - expand: boolean; - setExpand: (expand: boolean, index: number) => void; - text: string; -}) { - return ( -
- - -
- ); -} diff --git a/app/(protected)/project/[project_id]/experiments/compare/page.tsx b/app/(protected)/project/[project_id]/experiments/compare/page.tsx new file mode 100644 index 00000000..59a8d515 --- /dev/null +++ b/app/(protected)/project/[project_id]/experiments/compare/page.tsx @@ -0,0 +1,166 @@ +"use client"; + +import { Badge } from "@/components/ui/badge"; +import { Button } from "@/components/ui/button"; +import { Checkbox } from "@/components/ui/checkbox"; +import { cn } from "@/lib/utils"; +import { ChevronLeft } from "lucide-react"; +import { useRouter, useSearchParams } from "next/navigation"; +import { useState } from "react"; +import data from "../experiment_result"; + +export default function Experiments() { + const router = useRouter(); + // get run id from query params + const searchParams = useSearchParams(); + const runIds = searchParams.getAll("run_id") as string[]; + const experiments = data.filter((exp) => runIds.includes(exp.eval.run_id)); + const isComparable = verifyIfSampleInputsMatch(runIds, data); + if (!isComparable) { + return ( +
+

+ The selected experiments are not comparable. Please select experiments + ran against the same dataset. +

+ +
+ ); + } + + return ( +
+
+ {/*

Run ID: {runId}

*/} + +
+
+
+ +
+ {!experiments || + (experiments?.length === 0 && ( +
+

+ No experiments found for comparison. +

+ +
+ ))} + {experiments[0]?.samples && experiments[0]?.samples?.length > 0 && ( +
+ + + + + + + {experiments.map((experiment, i) => ( + + ))} + + + + {experiments[0].samples.map((_, i) => ( + + ))} + +
+ + InputTarget{`Output - (${experiment.eval.model})`}
+
+ )} +
+
+ ); +} + +function SampleRow({ + index, + experiments, +}: { + index: number; + experiments: any; +}) { + const [open, setOpen] = useState(false); + return ( + setOpen(!open)} + > + + e.stopPropagation()} /> + + + {typeof experiments[0]?.samples[index]?.input === "string" + ? experiments[0]?.samples[index]?.input + : Array.isArray(experiments[0]?.samples[index]?.input) + ? experiments[0]?.samples[index]?.input[ + experiments[0]?.samples[index]?.input?.length - 1 + ]?.content + : ""} + + + {experiments[0]?.samples[index]?.target || "none"} + + {experiments.map((experiment: any, i: number) => ( + +
+ + {experiment?.samples[index]?.score?.value === "I" + ? "INCORRECT" + : "CORRECT"} + + + {experiment?.samples[index]?.output?.model || ""} + +

+ {experiment?.samples[index]?.output?.choices && + experiment?.samples[index]?.output?.choices?.length > 0 + ? experiment?.samples[index]?.output?.choices[ + experiment?.samples[index]?.output?.choices?.length - 1 + ].message?.content + : ""} +

+
+ + ))} + + ); +} + +function verifyIfSampleInputsMatch(runIds: string[], data: any): boolean { + const experiments = data.filter((exp: any) => + runIds.includes(exp.eval.run_id) + ); + if (experiments.length === 0) return false; + + // iterate through each experiment and each sample and check if the input of sample at index i matches with the input of sample at index i for all experiments + for (let i = 0; i < experiments[0]?.samples?.length; i++) { + const input = experiments[0]?.samples[i]?.input; + for (let j = 1; j < experiments?.length; j++) { + if ( + JSON.stringify(input) !== + JSON.stringify(experiments[j]?.samples[i]?.input) + ) + return false; + } + } + + return true; +} diff --git a/app/(protected)/project/[project_id]/experiments/experiment_result.ts b/app/(protected)/project/[project_id]/experiments/experiment_result.ts index 1f44338f..86f71608 100644 --- a/app/(protected)/project/[project_id]/experiments/experiment_result.ts +++ b/app/(protected)/project/[project_id]/experiments/experiment_result.ts @@ -169,6 +169,176 @@ let data = [ ], logging: [], }, + { + version: 1, + status: "success", + eval: { + task: "security_guide", + task_version: 0, + task_file: "src/main.py", + task_id: "AjZbe8iaXETqiGGECFzW8E", + run_id: "r9FsZc425gEjCFNrXszPqW", + created: "2024-06-08T15:48:55", + dataset: { + name: "security_guide", + location: "example://security_guide", + }, + model: "openai/gpt-3.5-turbo", + task_attribs: {}, + task_args: {}, + model_args: {}, + config: {}, + packages: { + inspect_ai: "0.3.10", + }, + }, + plan: { + name: "plan", + steps: [ + { + solver: "generate", + params: {}, + }, + ], + config: {}, + }, + results: { + scorer: { + name: "includes", + params: {}, + }, + metrics: { + accuracy: { + name: "accuracy", + value: 0.5, + options: {}, + }, + bootstrap_std: { + name: "bootstrap_std", + value: 0.35385166383669864, + options: {}, + }, + }, + }, + stats: { + started_at: "2024-06-08T15:48:55", + completed_at: "2024-06-08T15:48:58", + model_usage: { + "openai/gpt-3.5-turbo": { + input_tokens: 30, + output_tokens: 451, + total_tokens: 481, + }, + }, + }, + samples: [ + { + id: 1, + epoch: 1, + input: [ + { + content: "How do I prevent SQL Injection attacks?", + source: "input", + role: "user", + }, + ], + target: "use parameterized queries and prepared statements", + messages: [ + { + content: "How do I prevent SQL Injection attacks?", + source: "input", + role: "user", + }, + { + content: + "1. Use parameterized queries: Parameterized queries separate SQL code from user input, preventing attackers from inserting malicious code into input fields.\n\n2. Use stored procedures: Stored procedures can prevent SQL injection attacks by defining specific methods for accessing the database, rather than allowing direct access to SQL queries.\n\n3. Input validation: Validate and sanitize all user input before using it in SQL queries. This helps prevent attackers from inserting malicious code into your database.\n\n4. Use ORM frameworks: Object-relational mapping (ORM) frameworks, such as Hibernate or Entity Framework, can help prevent SQL injection attacks by automatically handling parameterized queries and input validation.\n\n5. Limit database permissions: Only grant necessary permissions to database users to prevent unauthorized access and limit the potential damage of a successful SQL injection attack.\n\n6. Update and patch software: Regularly update and patch your database management system and web application to protect against known vulnerabilities that could be exploited by attackers.\n\n7. Use a web application firewall: Deploy a web application firewall to monitor and filter incoming traffic for potential SQL injection attacks. This can help protect your application from malicious code injection.", + source: "generate", + role: "assistant", + }, + ], + output: { + model: "gpt-3.5-turbo-0125", + choices: [ + { + message: { + content: + "1. Use parameterized queries: Parameterized queries separate SQL code from user input, preventing attackers from inserting malicious code into input fields.\n\n2. Use stored procedures: Stored procedures can prevent SQL injection attacks by defining specific methods for accessing the database, rather than allowing direct access to SQL queries.\n\n3. Input validation: Validate and sanitize all user input before using it in SQL queries. This helps prevent attackers from inserting malicious code into your database.\n\n4. Use ORM frameworks: Object-relational mapping (ORM) frameworks, such as Hibernate or Entity Framework, can help prevent SQL injection attacks by automatically handling parameterized queries and input validation.\n\n5. Limit database permissions: Only grant necessary permissions to database users to prevent unauthorized access and limit the potential damage of a successful SQL injection attack.\n\n6. Update and patch software: Regularly update and patch your database management system and web application to protect against known vulnerabilities that could be exploited by attackers.\n\n7. Use a web application firewall: Deploy a web application firewall to monitor and filter incoming traffic for potential SQL injection attacks. This can help protect your application from malicious code injection.", + source: "generate", + role: "assistant", + }, + stop_reason: "stop", + }, + ], + usage: { + input_tokens: 15, + output_tokens: 222, + total_tokens: 237, + }, + }, + score: { + value: "I", + answer: + "1. Use parameterized queries: Parameterized queries separate SQL code from user input, preventing attackers from inserting malicious code into input fields.\n\n2. Use stored procedures: Stored procedures can prevent SQL injection attacks by defining specific methods for accessing the database, rather than allowing direct access to SQL queries.\n\n3. Input validation: Validate and sanitize all user input before using it in SQL queries. This helps prevent attackers from inserting malicious code into your database.\n\n4. Use ORM frameworks: Object-relational mapping (ORM) frameworks, such as Hibernate or Entity Framework, can help prevent SQL injection attacks by automatically handling parameterized queries and input validation.\n\n5. Limit database permissions: Only grant necessary permissions to database users to prevent unauthorized access and limit the potential damage of a successful SQL injection attack.\n\n6. Update and patch software: Regularly update and patch your database management system and web application to protect against known vulnerabilities that could be exploited by attackers.\n\n7. Use a web application firewall: Deploy a web application firewall to monitor and filter incoming traffic for potential SQL injection attacks. This can help protect your application from malicious code injection.", + explanation: + "1. Use parameterized queries: Parameterized queries separate SQL code from user input, preventing attackers from inserting malicious code into input fields.\n\n2. Use stored procedures: Stored procedures can prevent SQL injection attacks by defining specific methods for accessing the database, rather than allowing direct access to SQL queries.\n\n3. Input validation: Validate and sanitize all user input before using it in SQL queries. This helps prevent attackers from inserting malicious code into your database.\n\n4. Use ORM frameworks: Object-relational mapping (ORM) frameworks, such as Hibernate or Entity Framework, can help prevent SQL injection attacks by automatically handling parameterized queries and input validation.\n\n5. Limit database permissions: Only grant necessary permissions to database users to prevent unauthorized access and limit the potential damage of a successful SQL injection attack.\n\n6. Update and patch software: Regularly update and patch your database management system and web application to protect against known vulnerabilities that could be exploited by attackers.\n\n7. Use a web application firewall: Deploy a web application firewall to monitor and filter incoming traffic for potential SQL injection attacks. This can help protect your application from malicious code injection.", + }, + metadata: {}, + }, + { + id: 2, + epoch: 1, + input: [ + { + content: "How do I prevent SQL Injection attacks?", + source: "input", + role: "user", + }, + ], + target: "Use parameterized queries", + messages: [ + { + content: "How do I prevent SQL Injection attacks?", + source: "input", + role: "user", + }, + { + content: + "There are several steps you can take to prevent SQL Injection attacks:\n\n1. Use parameterized queries: Use parameterized queries instead of directly embedding user input into SQL statements. This helps to separate data from code and prevents attackers from manipulating the query structure.\n\n2. Input validation: Validate all user input before using it in SQL queries. Make sure that the input matches the expected format and type.\n\n3. Escape user input: If you must dynamically generate SQL queries using user input, make sure to escape special characters in the input to prevent SQL injection.\n\n4. Use stored procedures: Use stored procedures to encapsulate your SQL logic and avoid direct manipulation of SQL queries.\n\n5. Limit database permissions: Restrict the permissions of the database user accounts to limit the damage that can be done in case of a successful SQL injection attack.\n\n6. Update regularly: Keep your database software and applications up to date to ensure that any security vulnerabilities are patched.\n\n7. Use a web application firewall: Implement a web application firewall that can detect and prevent SQL injection attacks.\n\nBy following these best practices, you can significantly reduce the risk of SQL injection attacks on your application.", + source: "generate", + role: "assistant", + }, + ], + output: { + model: "gpt-3.5-turbo-0125", + choices: [ + { + message: { + content: + "There are several steps you can take to prevent SQL Injection attacks:\n\n1. Use parameterized queries: Use parameterized queries instead of directly embedding user input into SQL statements. This helps to separate data from code and prevents attackers from manipulating the query structure.\n\n2. Input validation: Validate all user input before using it in SQL queries. Make sure that the input matches the expected format and type.\n\n3. Escape user input: If you must dynamically generate SQL queries using user input, make sure to escape special characters in the input to prevent SQL injection.\n\n4. Use stored procedures: Use stored procedures to encapsulate your SQL logic and avoid direct manipulation of SQL queries.\n\n5. Limit database permissions: Restrict the permissions of the database user accounts to limit the damage that can be done in case of a successful SQL injection attack.\n\n6. Update regularly: Keep your database software and applications up to date to ensure that any security vulnerabilities are patched.\n\n7. Use a web application firewall: Implement a web application firewall that can detect and prevent SQL injection attacks.\n\nBy following these best practices, you can significantly reduce the risk of SQL injection attacks on your application.", + source: "generate", + role: "assistant", + }, + stop_reason: "stop", + }, + ], + usage: { + input_tokens: 15, + output_tokens: 229, + total_tokens: 244, + }, + }, + score: { + value: "C", + answer: + "There are several steps you can take to prevent SQL Injection attacks:\n\n1. Use parameterized queries: Use parameterized queries instead of directly embedding user input into SQL statements. This helps to separate data from code and prevents attackers from manipulating the query structure.\n\n2. Input validation: Validate all user input before using it in SQL queries. Make sure that the input matches the expected format and type.\n\n3. Escape user input: If you must dynamically generate SQL queries using user input, make sure to escape special characters in the input to prevent SQL injection.\n\n4. Use stored procedures: Use stored procedures to encapsulate your SQL logic and avoid direct manipulation of SQL queries.\n\n5. Limit database permissions: Restrict the permissions of the database user accounts to limit the damage that can be done in case of a successful SQL injection attack.\n\n6. Update regularly: Keep your database software and applications up to date to ensure that any security vulnerabilities are patched.\n\n7. Use a web application firewall: Implement a web application firewall that can detect and prevent SQL injection attacks.\n\nBy following these best practices, you can significantly reduce the risk of SQL injection attacks on your application.", + explanation: + "There are several steps you can take to prevent SQL Injection attacks:\n\n1. Use parameterized queries: Use parameterized queries instead of directly embedding user input into SQL statements. This helps to separate data from code and prevents attackers from manipulating the query structure.\n\n2. Input validation: Validate all user input before using it in SQL queries. Make sure that the input matches the expected format and type.\n\n3. Escape user input: If you must dynamically generate SQL queries using user input, make sure to escape special characters in the input to prevent SQL injection.\n\n4. Use stored procedures: Use stored procedures to encapsulate your SQL logic and avoid direct manipulation of SQL queries.\n\n5. Limit database permissions: Restrict the permissions of the database user accounts to limit the damage that can be done in case of a successful SQL injection attack.\n\n6. Update regularly: Keep your database software and applications up to date to ensure that any security vulnerabilities are patched.\n\n7. Use a web application firewall: Implement a web application firewall that can detect and prevent SQL injection attacks.\n\nBy following these best practices, you can significantly reduce the risk of SQL injection attacks on your application.", + }, + metadata: {}, + }, + ], + logging: [], + }, { version: 1, status: "success", diff --git a/app/(protected)/project/[project_id]/experiments/page-client.tsx b/app/(protected)/project/[project_id]/experiments/page-client.tsx index f84d000f..e20ff97a 100644 --- a/app/(protected)/project/[project_id]/experiments/page-client.tsx +++ b/app/(protected)/project/[project_id]/experiments/page-client.tsx @@ -5,16 +5,33 @@ import { Button } from "@/components/ui/button"; import { Checkbox } from "@/components/ui/checkbox"; import { cn, formatDateTime } from "@/lib/utils"; import { useParams, useRouter } from "next/navigation"; +import { useState } from "react"; import data from "./experiment_result"; export default function Experiments() { const router = useRouter(); const projectId = useParams()?.project_id as string; + const [comparisonRunIds, setComparisonRunIds] = useState([]); return (

Experiments

- +
+ + +
@@ -22,7 +39,7 @@ export default function Experiments() { - + Run ID @@ -55,8 +72,26 @@ export default function Experiments() { ) } > - - + e.stopPropagation()} + > + { + if (value) { + setComparisonRunIds([ + ...comparisonRunIds, + experiment.eval.run_id, + ]); + } else { + setComparisonRunIds( + comparisonRunIds.filter( + (id) => id !== experiment.eval.run_id + ) + ); + } + }} + /> {experiment.eval.run_id} diff --git a/components/experiments/report-utility.tsx b/components/experiments/report-utility.tsx new file mode 100644 index 00000000..9df05201 --- /dev/null +++ b/components/experiments/report-utility.tsx @@ -0,0 +1,68 @@ +import { Button } from "@/components/ui/button"; +import { Copy, MoveDiagonal, X } from "lucide-react"; +import { toast } from "sonner"; + +export function ExpandContractButton({ + index, + expand, + setExpand, +}: { + index: number; + expand: boolean; + setExpand: (expand: boolean, index: number) => void; +}) { + return ( + + ); +} + +export function CopyButton({ text }: { text: string }) { + return ( + + ); +} + +export function UtilityButton({ + index, + expand, + setExpand, + text, +}: { + index: number; + expand: boolean; + setExpand: (expand: boolean, index: number) => void; + text: string; +}) { + return ( +
+ + +
+ ); +} From 80e7c7d39c128b92f7e6c8a53964b69280c40a18 Mon Sep 17 00:00:00 2001 From: Karthik Kalyanaraman Date: Mon, 10 Jun 2024 17:41:31 -0700 Subject: [PATCH 06/16] Experiment report --- .../experiments/[run_id]/page.tsx | 60 +- .../[project_id]/experiments/compare/page.tsx | 2 +- .../experiments/experiment_result.ts | 634 +----------------- .../[project_id]/experiments/page-client.tsx | 234 ++++--- 4 files changed, 194 insertions(+), 736 deletions(-) diff --git a/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx b/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx index d6253c47..42dd2668 100644 --- a/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx +++ b/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx @@ -4,7 +4,6 @@ import { UtilityButton } from "@/components/experiments/report-utility"; import { Conversation } from "@/components/shared/conversation-view"; import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; -import { Checkbox } from "@/components/ui/checkbox"; import { Separator } from "@/components/ui/separator"; import { Sheet, @@ -31,12 +30,30 @@ export default function Experiments() { return (
-

Run ID: {runId}

-
-
+
+ {experiment?.error && ( +
+

+ An error occurred while running this experiment. See below for + more details +

+
+
+                {experiment.error.message || "An error occurred."}
+              
+
+                {experiment.error.traceback || "No traceback available."}
+              
+
+
+ )} {!experiment || (experiment?.samples?.length === 0 && (
@@ -75,9 +105,6 @@ export default function Experiments() { - @@ -87,7 +114,7 @@ export default function Experiments() { - {experiment.samples.map((sample, i) => ( + {experiment.samples.map((sample: any, i: number) => ( { setExpand( - expand.map((_, j) => { + expand.map((_: any, j: number) => { return j === index ? value : expand[j]; }) ); @@ -134,9 +161,6 @@ function SampleRow({ className="hover:cursor-pointer hover:bg-muted group" onClick={() => setOpen(!open)} > - - {experiments[0].samples.map((_, i) => ( + {experiments[0].samples.map((_: any, i: number) => ( ))} diff --git a/app/(protected)/project/[project_id]/experiments/experiment_result.ts b/app/(protected)/project/[project_id]/experiments/experiment_result.ts index 86f71608..c37fe279 100644 --- a/app/(protected)/project/[project_id]/experiments/experiment_result.ts +++ b/app/(protected)/project/[project_id]/experiments/experiment_result.ts @@ -1,19 +1,19 @@ -let data = [ +const data: any[] = [ { version: 1, - status: "success", + status: "error", eval: { task: "security_guide", task_version: 0, task_file: "src/main.py", - task_id: "WjZbe8iaXETqiGGECFzW8E", - run_id: "k9FsZc425gEjCFNrXszPqW", - created: "2024-06-08T15:48:55", + task_id: "agwg9gxU5RHpXc3aALKijk", + run_id: "Kkg2c5UHPamfMk9eYfJLzD", + created: "2024-05-30T11:07:59", dataset: { name: "security_guide", location: "example://security_guide", }, - model: "openai/gpt-3.5-turbo", + model: "openai/gpt-4", task_attribs: {}, task_args: {}, model_args: {}, @@ -26,478 +26,33 @@ let data = [ name: "plan", steps: [ { - solver: "generate", + solver: "chain_of_thought", params: {}, }, - ], - config: {}, - }, - results: { - scorer: { - name: "includes", - params: {}, - }, - metrics: { - accuracy: { - name: "accuracy", - value: 0.5, - options: {}, - }, - bootstrap_std: { - name: "bootstrap_std", - value: 0.35385166383669864, - options: {}, - }, - }, - }, - stats: { - started_at: "2024-06-08T15:48:55", - completed_at: "2024-06-08T15:48:58", - model_usage: { - "openai/gpt-3.5-turbo": { - input_tokens: 30, - output_tokens: 451, - total_tokens: 481, - }, - }, - }, - samples: [ - { - id: 1, - epoch: 1, - input: [ - { - content: "How do I prevent SQL Injection attacks?", - source: "input", - role: "user", - }, - ], - target: "use parameterized queries and prepared statements", - messages: [ - { - content: "How do I prevent SQL Injection attacks?", - source: "input", - role: "user", - }, - { - content: - "1. Use parameterized queries: Parameterized queries separate SQL code from user input, preventing attackers from inserting malicious code into input fields.\n\n2. Use stored procedures: Stored procedures can prevent SQL injection attacks by defining specific methods for accessing the database, rather than allowing direct access to SQL queries.\n\n3. Input validation: Validate and sanitize all user input before using it in SQL queries. This helps prevent attackers from inserting malicious code into your database.\n\n4. Use ORM frameworks: Object-relational mapping (ORM) frameworks, such as Hibernate or Entity Framework, can help prevent SQL injection attacks by automatically handling parameterized queries and input validation.\n\n5. Limit database permissions: Only grant necessary permissions to database users to prevent unauthorized access and limit the potential damage of a successful SQL injection attack.\n\n6. Update and patch software: Regularly update and patch your database management system and web application to protect against known vulnerabilities that could be exploited by attackers.\n\n7. Use a web application firewall: Deploy a web application firewall to monitor and filter incoming traffic for potential SQL injection attacks. This can help protect your application from malicious code injection.", - source: "generate", - role: "assistant", - }, - ], - output: { - model: "gpt-3.5-turbo-0125", - choices: [ - { - message: { - content: - "1. Use parameterized queries: Parameterized queries separate SQL code from user input, preventing attackers from inserting malicious code into input fields.\n\n2. Use stored procedures: Stored procedures can prevent SQL injection attacks by defining specific methods for accessing the database, rather than allowing direct access to SQL queries.\n\n3. Input validation: Validate and sanitize all user input before using it in SQL queries. This helps prevent attackers from inserting malicious code into your database.\n\n4. Use ORM frameworks: Object-relational mapping (ORM) frameworks, such as Hibernate or Entity Framework, can help prevent SQL injection attacks by automatically handling parameterized queries and input validation.\n\n5. Limit database permissions: Only grant necessary permissions to database users to prevent unauthorized access and limit the potential damage of a successful SQL injection attack.\n\n6. Update and patch software: Regularly update and patch your database management system and web application to protect against known vulnerabilities that could be exploited by attackers.\n\n7. Use a web application firewall: Deploy a web application firewall to monitor and filter incoming traffic for potential SQL injection attacks. This can help protect your application from malicious code injection.", - source: "generate", - role: "assistant", - }, - stop_reason: "stop", - }, - ], - usage: { - input_tokens: 15, - output_tokens: 222, - total_tokens: 237, - }, - }, - score: { - value: "I", - answer: - "1. Use parameterized queries: Parameterized queries separate SQL code from user input, preventing attackers from inserting malicious code into input fields.\n\n2. Use stored procedures: Stored procedures can prevent SQL injection attacks by defining specific methods for accessing the database, rather than allowing direct access to SQL queries.\n\n3. Input validation: Validate and sanitize all user input before using it in SQL queries. This helps prevent attackers from inserting malicious code into your database.\n\n4. Use ORM frameworks: Object-relational mapping (ORM) frameworks, such as Hibernate or Entity Framework, can help prevent SQL injection attacks by automatically handling parameterized queries and input validation.\n\n5. Limit database permissions: Only grant necessary permissions to database users to prevent unauthorized access and limit the potential damage of a successful SQL injection attack.\n\n6. Update and patch software: Regularly update and patch your database management system and web application to protect against known vulnerabilities that could be exploited by attackers.\n\n7. Use a web application firewall: Deploy a web application firewall to monitor and filter incoming traffic for potential SQL injection attacks. This can help protect your application from malicious code injection.", - explanation: - "1. Use parameterized queries: Parameterized queries separate SQL code from user input, preventing attackers from inserting malicious code into input fields.\n\n2. Use stored procedures: Stored procedures can prevent SQL injection attacks by defining specific methods for accessing the database, rather than allowing direct access to SQL queries.\n\n3. Input validation: Validate and sanitize all user input before using it in SQL queries. This helps prevent attackers from inserting malicious code into your database.\n\n4. Use ORM frameworks: Object-relational mapping (ORM) frameworks, such as Hibernate or Entity Framework, can help prevent SQL injection attacks by automatically handling parameterized queries and input validation.\n\n5. Limit database permissions: Only grant necessary permissions to database users to prevent unauthorized access and limit the potential damage of a successful SQL injection attack.\n\n6. Update and patch software: Regularly update and patch your database management system and web application to protect against known vulnerabilities that could be exploited by attackers.\n\n7. Use a web application firewall: Deploy a web application firewall to monitor and filter incoming traffic for potential SQL injection attacks. This can help protect your application from malicious code injection.", - }, - metadata: {}, - }, - { - id: 2, - epoch: 1, - input: [ - { - content: "How do I prevent SQL Injection attacks?", - source: "input", - role: "user", - }, - ], - target: "Use parameterized queries", - messages: [ - { - content: "How do I prevent SQL Injection attacks?", - source: "input", - role: "user", - }, - { - content: - "There are several steps you can take to prevent SQL Injection attacks:\n\n1. Use parameterized queries: Use parameterized queries instead of directly embedding user input into SQL statements. This helps to separate data from code and prevents attackers from manipulating the query structure.\n\n2. Input validation: Validate all user input before using it in SQL queries. Make sure that the input matches the expected format and type.\n\n3. Escape user input: If you must dynamically generate SQL queries using user input, make sure to escape special characters in the input to prevent SQL injection.\n\n4. Use stored procedures: Use stored procedures to encapsulate your SQL logic and avoid direct manipulation of SQL queries.\n\n5. Limit database permissions: Restrict the permissions of the database user accounts to limit the damage that can be done in case of a successful SQL injection attack.\n\n6. Update regularly: Keep your database software and applications up to date to ensure that any security vulnerabilities are patched.\n\n7. Use a web application firewall: Implement a web application firewall that can detect and prevent SQL injection attacks.\n\nBy following these best practices, you can significantly reduce the risk of SQL injection attacks on your application.", - source: "generate", - role: "assistant", - }, - ], - output: { - model: "gpt-3.5-turbo-0125", - choices: [ - { - message: { - content: - "There are several steps you can take to prevent SQL Injection attacks:\n\n1. Use parameterized queries: Use parameterized queries instead of directly embedding user input into SQL statements. This helps to separate data from code and prevents attackers from manipulating the query structure.\n\n2. Input validation: Validate all user input before using it in SQL queries. Make sure that the input matches the expected format and type.\n\n3. Escape user input: If you must dynamically generate SQL queries using user input, make sure to escape special characters in the input to prevent SQL injection.\n\n4. Use stored procedures: Use stored procedures to encapsulate your SQL logic and avoid direct manipulation of SQL queries.\n\n5. Limit database permissions: Restrict the permissions of the database user accounts to limit the damage that can be done in case of a successful SQL injection attack.\n\n6. Update regularly: Keep your database software and applications up to date to ensure that any security vulnerabilities are patched.\n\n7. Use a web application firewall: Implement a web application firewall that can detect and prevent SQL injection attacks.\n\nBy following these best practices, you can significantly reduce the risk of SQL injection attacks on your application.", - source: "generate", - role: "assistant", - }, - stop_reason: "stop", - }, - ], - usage: { - input_tokens: 15, - output_tokens: 229, - total_tokens: 244, - }, - }, - score: { - value: "C", - answer: - "There are several steps you can take to prevent SQL Injection attacks:\n\n1. Use parameterized queries: Use parameterized queries instead of directly embedding user input into SQL statements. This helps to separate data from code and prevents attackers from manipulating the query structure.\n\n2. Input validation: Validate all user input before using it in SQL queries. Make sure that the input matches the expected format and type.\n\n3. Escape user input: If you must dynamically generate SQL queries using user input, make sure to escape special characters in the input to prevent SQL injection.\n\n4. Use stored procedures: Use stored procedures to encapsulate your SQL logic and avoid direct manipulation of SQL queries.\n\n5. Limit database permissions: Restrict the permissions of the database user accounts to limit the damage that can be done in case of a successful SQL injection attack.\n\n6. Update regularly: Keep your database software and applications up to date to ensure that any security vulnerabilities are patched.\n\n7. Use a web application firewall: Implement a web application firewall that can detect and prevent SQL injection attacks.\n\nBy following these best practices, you can significantly reduce the risk of SQL injection attacks on your application.", - explanation: - "There are several steps you can take to prevent SQL Injection attacks:\n\n1. Use parameterized queries: Use parameterized queries instead of directly embedding user input into SQL statements. This helps to separate data from code and prevents attackers from manipulating the query structure.\n\n2. Input validation: Validate all user input before using it in SQL queries. Make sure that the input matches the expected format and type.\n\n3. Escape user input: If you must dynamically generate SQL queries using user input, make sure to escape special characters in the input to prevent SQL injection.\n\n4. Use stored procedures: Use stored procedures to encapsulate your SQL logic and avoid direct manipulation of SQL queries.\n\n5. Limit database permissions: Restrict the permissions of the database user accounts to limit the damage that can be done in case of a successful SQL injection attack.\n\n6. Update regularly: Keep your database software and applications up to date to ensure that any security vulnerabilities are patched.\n\n7. Use a web application firewall: Implement a web application firewall that can detect and prevent SQL injection attacks.\n\nBy following these best practices, you can significantly reduce the risk of SQL injection attacks on your application.", - }, - metadata: {}, - }, - ], - logging: [], - }, - { - version: 1, - status: "success", - eval: { - task: "security_guide", - task_version: 0, - task_file: "src/main.py", - task_id: "AjZbe8iaXETqiGGECFzW8E", - run_id: "r9FsZc425gEjCFNrXszPqW", - created: "2024-06-08T15:48:55", - dataset: { - name: "security_guide", - location: "example://security_guide", - }, - model: "openai/gpt-3.5-turbo", - task_attribs: {}, - task_args: {}, - model_args: {}, - config: {}, - packages: { - inspect_ai: "0.3.10", - }, - }, - plan: { - name: "plan", - steps: [ { solver: "generate", params: {}, }, - ], - config: {}, - }, - results: { - scorer: { - name: "includes", - params: {}, - }, - metrics: { - accuracy: { - name: "accuracy", - value: 0.5, - options: {}, - }, - bootstrap_std: { - name: "bootstrap_std", - value: 0.35385166383669864, - options: {}, - }, - }, - }, - stats: { - started_at: "2024-06-08T15:48:55", - completed_at: "2024-06-08T15:48:58", - model_usage: { - "openai/gpt-3.5-turbo": { - input_tokens: 30, - output_tokens: 451, - total_tokens: 481, - }, - }, - }, - samples: [ - { - id: 1, - epoch: 1, - input: [ - { - content: "How do I prevent SQL Injection attacks?", - source: "input", - role: "user", - }, - ], - target: "use parameterized queries and prepared statements", - messages: [ - { - content: "How do I prevent SQL Injection attacks?", - source: "input", - role: "user", - }, - { - content: - "1. Use parameterized queries: Parameterized queries separate SQL code from user input, preventing attackers from inserting malicious code into input fields.\n\n2. Use stored procedures: Stored procedures can prevent SQL injection attacks by defining specific methods for accessing the database, rather than allowing direct access to SQL queries.\n\n3. Input validation: Validate and sanitize all user input before using it in SQL queries. This helps prevent attackers from inserting malicious code into your database.\n\n4. Use ORM frameworks: Object-relational mapping (ORM) frameworks, such as Hibernate or Entity Framework, can help prevent SQL injection attacks by automatically handling parameterized queries and input validation.\n\n5. Limit database permissions: Only grant necessary permissions to database users to prevent unauthorized access and limit the potential damage of a successful SQL injection attack.\n\n6. Update and patch software: Regularly update and patch your database management system and web application to protect against known vulnerabilities that could be exploited by attackers.\n\n7. Use a web application firewall: Deploy a web application firewall to monitor and filter incoming traffic for potential SQL injection attacks. This can help protect your application from malicious code injection.", - source: "generate", - role: "assistant", - }, - ], - output: { - model: "gpt-3.5-turbo-0125", - choices: [ - { - message: { - content: - "1. Use parameterized queries: Parameterized queries separate SQL code from user input, preventing attackers from inserting malicious code into input fields.\n\n2. Use stored procedures: Stored procedures can prevent SQL injection attacks by defining specific methods for accessing the database, rather than allowing direct access to SQL queries.\n\n3. Input validation: Validate and sanitize all user input before using it in SQL queries. This helps prevent attackers from inserting malicious code into your database.\n\n4. Use ORM frameworks: Object-relational mapping (ORM) frameworks, such as Hibernate or Entity Framework, can help prevent SQL injection attacks by automatically handling parameterized queries and input validation.\n\n5. Limit database permissions: Only grant necessary permissions to database users to prevent unauthorized access and limit the potential damage of a successful SQL injection attack.\n\n6. Update and patch software: Regularly update and patch your database management system and web application to protect against known vulnerabilities that could be exploited by attackers.\n\n7. Use a web application firewall: Deploy a web application firewall to monitor and filter incoming traffic for potential SQL injection attacks. This can help protect your application from malicious code injection.", - source: "generate", - role: "assistant", - }, - stop_reason: "stop", - }, - ], - usage: { - input_tokens: 15, - output_tokens: 222, - total_tokens: 237, - }, - }, - score: { - value: "I", - answer: - "1. Use parameterized queries: Parameterized queries separate SQL code from user input, preventing attackers from inserting malicious code into input fields.\n\n2. Use stored procedures: Stored procedures can prevent SQL injection attacks by defining specific methods for accessing the database, rather than allowing direct access to SQL queries.\n\n3. Input validation: Validate and sanitize all user input before using it in SQL queries. This helps prevent attackers from inserting malicious code into your database.\n\n4. Use ORM frameworks: Object-relational mapping (ORM) frameworks, such as Hibernate or Entity Framework, can help prevent SQL injection attacks by automatically handling parameterized queries and input validation.\n\n5. Limit database permissions: Only grant necessary permissions to database users to prevent unauthorized access and limit the potential damage of a successful SQL injection attack.\n\n6. Update and patch software: Regularly update and patch your database management system and web application to protect against known vulnerabilities that could be exploited by attackers.\n\n7. Use a web application firewall: Deploy a web application firewall to monitor and filter incoming traffic for potential SQL injection attacks. This can help protect your application from malicious code injection.", - explanation: - "1. Use parameterized queries: Parameterized queries separate SQL code from user input, preventing attackers from inserting malicious code into input fields.\n\n2. Use stored procedures: Stored procedures can prevent SQL injection attacks by defining specific methods for accessing the database, rather than allowing direct access to SQL queries.\n\n3. Input validation: Validate and sanitize all user input before using it in SQL queries. This helps prevent attackers from inserting malicious code into your database.\n\n4. Use ORM frameworks: Object-relational mapping (ORM) frameworks, such as Hibernate or Entity Framework, can help prevent SQL injection attacks by automatically handling parameterized queries and input validation.\n\n5. Limit database permissions: Only grant necessary permissions to database users to prevent unauthorized access and limit the potential damage of a successful SQL injection attack.\n\n6. Update and patch software: Regularly update and patch your database management system and web application to protect against known vulnerabilities that could be exploited by attackers.\n\n7. Use a web application firewall: Deploy a web application firewall to monitor and filter incoming traffic for potential SQL injection attacks. This can help protect your application from malicious code injection.", - }, - metadata: {}, - }, - { - id: 2, - epoch: 1, - input: [ - { - content: "How do I prevent SQL Injection attacks?", - source: "input", - role: "user", - }, - ], - target: "Use parameterized queries", - messages: [ - { - content: "How do I prevent SQL Injection attacks?", - source: "input", - role: "user", - }, - { - content: - "There are several steps you can take to prevent SQL Injection attacks:\n\n1. Use parameterized queries: Use parameterized queries instead of directly embedding user input into SQL statements. This helps to separate data from code and prevents attackers from manipulating the query structure.\n\n2. Input validation: Validate all user input before using it in SQL queries. Make sure that the input matches the expected format and type.\n\n3. Escape user input: If you must dynamically generate SQL queries using user input, make sure to escape special characters in the input to prevent SQL injection.\n\n4. Use stored procedures: Use stored procedures to encapsulate your SQL logic and avoid direct manipulation of SQL queries.\n\n5. Limit database permissions: Restrict the permissions of the database user accounts to limit the damage that can be done in case of a successful SQL injection attack.\n\n6. Update regularly: Keep your database software and applications up to date to ensure that any security vulnerabilities are patched.\n\n7. Use a web application firewall: Implement a web application firewall that can detect and prevent SQL injection attacks.\n\nBy following these best practices, you can significantly reduce the risk of SQL injection attacks on your application.", - source: "generate", - role: "assistant", - }, - ], - output: { - model: "gpt-3.5-turbo-0125", - choices: [ - { - message: { - content: - "There are several steps you can take to prevent SQL Injection attacks:\n\n1. Use parameterized queries: Use parameterized queries instead of directly embedding user input into SQL statements. This helps to separate data from code and prevents attackers from manipulating the query structure.\n\n2. Input validation: Validate all user input before using it in SQL queries. Make sure that the input matches the expected format and type.\n\n3. Escape user input: If you must dynamically generate SQL queries using user input, make sure to escape special characters in the input to prevent SQL injection.\n\n4. Use stored procedures: Use stored procedures to encapsulate your SQL logic and avoid direct manipulation of SQL queries.\n\n5. Limit database permissions: Restrict the permissions of the database user accounts to limit the damage that can be done in case of a successful SQL injection attack.\n\n6. Update regularly: Keep your database software and applications up to date to ensure that any security vulnerabilities are patched.\n\n7. Use a web application firewall: Implement a web application firewall that can detect and prevent SQL injection attacks.\n\nBy following these best practices, you can significantly reduce the risk of SQL injection attacks on your application.", - source: "generate", - role: "assistant", - }, - stop_reason: "stop", - }, - ], - usage: { - input_tokens: 15, - output_tokens: 229, - total_tokens: 244, - }, - }, - score: { - value: "C", - answer: - "There are several steps you can take to prevent SQL Injection attacks:\n\n1. Use parameterized queries: Use parameterized queries instead of directly embedding user input into SQL statements. This helps to separate data from code and prevents attackers from manipulating the query structure.\n\n2. Input validation: Validate all user input before using it in SQL queries. Make sure that the input matches the expected format and type.\n\n3. Escape user input: If you must dynamically generate SQL queries using user input, make sure to escape special characters in the input to prevent SQL injection.\n\n4. Use stored procedures: Use stored procedures to encapsulate your SQL logic and avoid direct manipulation of SQL queries.\n\n5. Limit database permissions: Restrict the permissions of the database user accounts to limit the damage that can be done in case of a successful SQL injection attack.\n\n6. Update regularly: Keep your database software and applications up to date to ensure that any security vulnerabilities are patched.\n\n7. Use a web application firewall: Implement a web application firewall that can detect and prevent SQL injection attacks.\n\nBy following these best practices, you can significantly reduce the risk of SQL injection attacks on your application.", - explanation: - "There are several steps you can take to prevent SQL Injection attacks:\n\n1. Use parameterized queries: Use parameterized queries instead of directly embedding user input into SQL statements. This helps to separate data from code and prevents attackers from manipulating the query structure.\n\n2. Input validation: Validate all user input before using it in SQL queries. Make sure that the input matches the expected format and type.\n\n3. Escape user input: If you must dynamically generate SQL queries using user input, make sure to escape special characters in the input to prevent SQL injection.\n\n4. Use stored procedures: Use stored procedures to encapsulate your SQL logic and avoid direct manipulation of SQL queries.\n\n5. Limit database permissions: Restrict the permissions of the database user accounts to limit the damage that can be done in case of a successful SQL injection attack.\n\n6. Update regularly: Keep your database software and applications up to date to ensure that any security vulnerabilities are patched.\n\n7. Use a web application firewall: Implement a web application firewall that can detect and prevent SQL injection attacks.\n\nBy following these best practices, you can significantly reduce the risk of SQL injection attacks on your application.", - }, - metadata: {}, - }, - ], - logging: [], - }, - { - version: 1, - status: "success", - eval: { - task: "security_guide", - task_version: 0, - task_file: "src/main.py", - task_id: "FuR38tV2t6a3jKjED9Qq8R", - run_id: "aYmmWwRL8PBH8wPnBo3e3Z", - created: "2024-06-08T16:22:05", - dataset: { - name: "security_guide", - location: "example://security_guide", - }, - model: "openai/gpt-3.5-turbo", - task_attribs: {}, - task_args: {}, - model_args: {}, - config: {}, - packages: { - inspect_ai: "0.3.10", - }, - }, - plan: { - name: "plan", - steps: [ { - solver: "generate", + solver: "self_critique", params: {}, }, ], config: {}, }, - results: { - scorer: { - name: "check_length", - params: {}, - }, - metrics: { - accuracy: { - name: "accuracy", - value: 0.0, - options: {}, - }, - }, - }, stats: { - started_at: "2024-06-08T16:22:05", - completed_at: "2024-06-08T16:22:06", - model_usage: { - "openai/gpt-3.5-turbo": { - input_tokens: 32, - output_tokens: 74, - total_tokens: 106, - }, - }, + started_at: "2024-05-30T11:07:59", + completed_at: "2024-05-30T11:07:59", + model_usage: {}, + }, + error: { + message: + "ValueError('user_prompt requested from TaskState but none available')", + traceback: + 'Traceback (most recent call last):\n\n File "/Users/karthikkalyanaraman/work/inspect_ai_playground/pyenv/lib/python3.10/site-packages/inspect_ai/_eval/task/run.py", line 175, in task_run\n scores = await asyncio.gather(*tasks)\n\n File "/Users/karthikkalyanaraman/work/inspect_ai_playground/pyenv/lib/python3.10/site-packages/inspect_ai/_eval/task/run.py", line 256, in task_run_sample\n state = await solver(state, generate)\n\n File "/Users/karthikkalyanaraman/work/inspect_ai_playground/pyenv/lib/python3.10/site-packages/inspect_ai/solver/_prompt.py", line 33, in solve\n prompt = state.user_prompt\n\n File "/Users/karthikkalyanaraman/work/inspect_ai_playground/pyenv/lib/python3.10/site-packages/inspect_ai/solver/_solver.py", line 124, in user_prompt\n raise ValueError("user_prompt requested from TaskState but none available")\n\nValueError: user_prompt requested from TaskState but none available\n', + traceback_ansi: + "\u001b[31m╭─\u001b[0m\u001b[31m──────────────────────────────\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31m───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n\u001b[31m│\u001b[0m \u001b[2;33m/Users/karthikkalyanaraman/work/inspect_ai_playground/pyenv/lib/python3.10/site-packages/inspect\u001b[0m \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[2;33m_ai/_eval/task/\u001b[0m\u001b[1;33mrun.py\u001b[0m:\u001b[94m175\u001b[0m in \u001b[92mtask_run\u001b[0m \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[2;33m/Users/karthikkalyanaraman/work/inspect_ai_playground/pyenv/lib/python3.10/site-packages/inspect\u001b[0m \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[2;33m_ai/_eval/task/\u001b[0m\u001b[1;33mrun.py\u001b[0m:\u001b[94m256\u001b[0m in \u001b[92mtask_run_sample\u001b[0m \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[2;33m/Users/karthikkalyanaraman/work/inspect_ai_playground/pyenv/lib/python3.10/site-packages/inspect\u001b[0m \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[2;33m_ai/solver/\u001b[0m\u001b[1;33m_prompt.py\u001b[0m:\u001b[94m33\u001b[0m in \u001b[92msolve\u001b[0m \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[2;33m/Users/karthikkalyanaraman/work/inspect_ai_playground/pyenv/lib/python3.10/site-packages/inspect\u001b[0m \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[2;33m_ai/solver/\u001b[0m\u001b[1;33m_solver.py\u001b[0m:\u001b[94m124\u001b[0m in \u001b[92muser_prompt\u001b[0m \u001b[31m│\u001b[0m\n\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n\u001b[1;91mValueError: \u001b[0muser_prompt requested from TaskState but none available\n", }, - samples: [ - { - id: 1, - epoch: 1, - input: [ - { - content: "Write a funny tweet about newyork city?", - source: "input", - role: "user", - }, - ], - target: "", - messages: [ - { - content: "Write a funny tweet about newyork city?", - source: "input", - role: "user", - }, - { - content: - '"Just spent 3 hours trying to find a public restroom in NYC, ended up accidentally walking into a trendy hipster coffee shop bathroom where they charge $10 for a latte with a side of TP #cityproblems #NYC"', - source: "generate", - role: "assistant", - }, - ], - output: { - model: "gpt-3.5-turbo-0125", - choices: [ - { - message: { - content: - '"Just spent 3 hours trying to find a public restroom in NYC, ended up accidentally walking into a trendy hipster coffee shop bathroom where they charge $10 for a latte with a side of TP #cityproblems #NYC"', - source: "generate", - role: "assistant", - }, - stop_reason: "stop", - }, - ], - usage: { - input_tokens: 17, - output_tokens: 47, - total_tokens: 64, - }, - }, - score: { - value: "I", - answer: - '"Just spent 3 hours trying to find a public restroom in NYC, ended up accidentally walking into a trendy hipster coffee shop bathroom where they charge $10 for a latte with a side of TP #cityproblems #NYC"', - }, - metadata: {}, - }, - { - id: 2, - epoch: 1, - input: [ - { - content: "Write a funny tweet about vancouver?", - source: "input", - role: "user", - }, - ], - target: "", - messages: [ - { - content: "Write a funny tweet about vancouver?", - source: "input", - role: "user", - }, - { - content: - "Just saw someone try to use an umbrella in Vancouver... and that's how you know they're not a local. #raincitylife", - source: "generate", - role: "assistant", - }, - ], - output: { - model: "gpt-3.5-turbo-0125", - choices: [ - { - message: { - content: - "Just saw someone try to use an umbrella in Vancouver... and that's how you know they're not a local. #raincitylife", - source: "generate", - role: "assistant", - }, - stop_reason: "stop", - }, - ], - usage: { - input_tokens: 15, - output_tokens: 27, - total_tokens: 42, - }, - }, - score: { - value: "I", - answer: - "Just saw someone try to use an umbrella in Vancouver... and that's how you know they're not a local. #raincitylife", - }, - metadata: {}, - }, - ], logging: [], }, { @@ -549,12 +104,12 @@ let data = [ metrics: { accuracy: { name: "accuracy", - value: 1, + value: 1.0, options: {}, }, bootstrap_std: { name: "bootstrap_std", - value: 0, + value: 0.0, options: {}, }, }, @@ -647,153 +202,6 @@ let data = [ ], logging: [], }, - { - version: 1, - status: "success", - eval: { - task: "security_guide", - task_version: 0, - task_file: "src/main.py", - task_id: "WaGqtQ7sZYWkmmDYcubjiv", - run_id: "ar7LzC2PefbchfLMasaasaF", - created: "2024-05-30T11:06:32", - dataset: { - name: "security_guide", - location: "example://security_guide", - }, - model: "openai/gpt-4", - task_attribs: {}, - task_args: {}, - model_args: {}, - config: {}, - packages: { - inspect_ai: "0.3.10", - }, - }, - plan: { - name: "plan", - steps: [ - { - solver: "chain_of_thought", - params: {}, - }, - { - solver: "generate", - params: {}, - }, - { - solver: "self_critique", - params: {}, - }, - ], - config: {}, - }, - results: { - scorer: { - name: "model_graded_fact", - params: {}, - }, - metrics: { - accuracy: { - name: "accuracy", - value: 1, - options: {}, - }, - bootstrap_std: { - name: "bootstrap_std", - value: 0, - options: {}, - }, - }, - }, - stats: { - started_at: "2024-05-30T11:06:32", - completed_at: "2024-05-30T11:06:45", - model_usage: { - "openai/gpt-4": { - input_tokens: 1677, - output_tokens: 728, - total_tokens: 2405, - }, - }, - }, - samples: [ - { - id: 1, - epoch: 1, - input: "How do I prevent SQL Injection attacks?", - target: "use parameterized queries and prepared statements", - messages: [ - { - content: - '\nHow do I prevent SQL Injection attacks?\n\nBefore answering, reason in a step-by-step manner as to get the right answer. Provide your answer at the end on its own line in the form "ANSWER: $ANSWER" (without quotes) where $ANSWER is the answer to the question.\n', - source: "input", - role: "user", - }, - { - content: - "1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack that allows attackers to manipulate the SQL code of a web application to gain unauthorized access to the database or to perform harmful actions.\n\n2. Sanitize user inputs: One of the most common ways attackers exploit SQL Injection vulnerabilities is through user inputs. Make sure to validate and sanitize all user inputs to prevent malicious code from being executed.\n\n3. Use parameterized queries: Instead of dynamically creating SQL queries by concatenating strings, use parameterized queries which automatically sanitize inputs by treating them as data rather than executable code.\n\n4. Limit database permissions: Ensure that the database user account used by your application has the least amount of privileges necessary. Avoid using an admin account for regular application tasks.\n\n5. Update and patch regularly: Make sure your database management system, web server, and application software are up to date with the latest security patches to protect against known vulnerabilities.\n\n6. Implement input validation: Validate and sanitize user inputs on both client-side and server-side to reduce the risk of SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, sanitize user inputs, use parameterized queries, limit database permissions, update and patch regularly, and implement input validation.", - source: "generate", - role: "assistant", - }, - { - content: - '\nGiven the following question, initial answer and critique please generate an improved answer to the question:\n\n[BEGIN DATA]\n***\n[Question]: How do I prevent SQL Injection attacks?\n***\n[Answer]: 1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack that allows attackers to manipulate the SQL code of a web application to gain unauthorized access to the database or to perform harmful actions.\n\n2. Sanitize user inputs: One of the most common ways attackers exploit SQL Injection vulnerabilities is through user inputs. Make sure to validate and sanitize all user inputs to prevent malicious code from being executed.\n\n3. Use parameterized queries: Instead of dynamically creating SQL queries by concatenating strings, use parameterized queries which automatically sanitize inputs by treating them as data rather than executable code.\n\n4. Limit database permissions: Ensure that the database user account used by your application has the least amount of privileges necessary. Avoid using an admin account for regular application tasks.\n\n5. Update and patch regularly: Make sure your database management system, web server, and application software are up to date with the latest security patches to protect against known vulnerabilities.\n\n6. Implement input validation: Validate and sanitize user inputs on both client-side and server-side to reduce the risk of SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, sanitize user inputs, use parameterized queries, limit database permissions, update and patch regularly, and implement input validation.\n***\n[Critique]: The answer provides a good overview of ways to prevent SQL Injection attacks, but it lacks depth and specificity in explaining each point. It would be beneficial to provide more details and examples for each prevention method to ensure a comprehensive understanding for the reader. Additionally, the answer could benefit from explaining why each prevention method is effective in stopping SQL Injection attacks.\n***\n[END DATA]\n\nIf the original answer is already correct, just repeat the original answer exactly. Provide your answer at the end on its own line in the form "ANSWER: $ANSWER" (without quotes) where $ANSWER is the answer to the question.\n', - role: "user", - }, - { - content: - "Improved Answer:\n1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack where malicious SQL statements are inserted into an entry field for execution. This can allow attackers to gain unauthorized access to data or manipulate it in unintended ways.\n\n2. Sanitize user inputs: By validating and sanitizing all user inputs, you can ensure that no malicious SQL code is included. This involves removing potentially harmful characters or encoding inputs to prevent attacks.\n\n3. Use parameterized queries: Parameterized queries separate SQL code from user input, treating them as data rather than executable code. This prevents attackers from injecting SQL statements through user inputs.\n\n4. Limit database permissions: Implement a principle of least privilege by granting only necessary permissions to database users. Avoid using overly permissive admin accounts for everyday tasks, reducing the potential impact of a successful attack.\n\n5. Update and patch regularly: Keeping all software components up to date with security patches is crucial to fixing known vulnerabilities that could be exploited by attackers. This helps in safeguarding against SQL Injection attacks.\n\n6. Implement input validation: Utilize both client-side and server-side validation mechanisms to verify and sanitize inputs before interacting with the database. This multi-layered approach can significantly reduce the risk of successful SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, it is essential to understand the nature of the attacks, sanitize user inputs, use parameterized queries, limit database permissions, update software regularly, and implement thorough input validation.", - source: "generate", - role: "assistant", - }, - ], - output: { - model: "gpt-4", - choices: [ - { - message: { - content: - "Improved Answer:\n1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack where malicious SQL statements are inserted into an entry field for execution. This can allow attackers to gain unauthorized access to data or manipulate it in unintended ways.\n\n2. Sanitize user inputs: By validating and sanitizing all user inputs, you can ensure that no malicious SQL code is included. This involves removing potentially harmful characters or encoding inputs to prevent attacks.\n\n3. Use parameterized queries: Parameterized queries separate SQL code from user input, treating them as data rather than executable code. This prevents attackers from injecting SQL statements through user inputs.\n\n4. Limit database permissions: Implement a principle of least privilege by granting only necessary permissions to database users. Avoid using overly permissive admin accounts for everyday tasks, reducing the potential impact of a successful attack.\n\n5. Update and patch regularly: Keeping all software components up to date with security patches is crucial to fixing known vulnerabilities that could be exploited by attackers. This helps in safeguarding against SQL Injection attacks.\n\n6. Implement input validation: Utilize both client-side and server-side validation mechanisms to verify and sanitize inputs before interacting with the database. This multi-layered approach can significantly reduce the risk of successful SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, it is essential to understand the nature of the attacks, sanitize user inputs, use parameterized queries, limit database permissions, update software regularly, and implement thorough input validation.", - source: "generate", - role: "assistant", - }, - stop_reason: "stop", - }, - ], - usage: { - input_tokens: 738, - output_tokens: 294, - total_tokens: 1032, - }, - }, - score: { - value: "C", - answer: - "Improved Answer:\n1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack where malicious SQL statements are inserted into an entry field for execution. This can allow attackers to gain unauthorized access to data or manipulate it in unintended ways.\n\n2. Sanitize user inputs: By validating and sanitizing all user inputs, you can ensure that no malicious SQL code is included. This involves removing potentially harmful characters or encoding inputs to prevent attacks.\n\n3. Use parameterized queries: Parameterized queries separate SQL code from user input, treating them as data rather than executable code. This prevents attackers from injecting SQL statements through user inputs.\n\n4. Limit database permissions: Implement a principle of least privilege by granting only necessary permissions to database users. Avoid using overly permissive admin accounts for everyday tasks, reducing the potential impact of a successful attack.\n\n5. Update and patch regularly: Keeping all software components up to date with security patches is crucial to fixing known vulnerabilities that could be exploited by attackers. This helps in safeguarding against SQL Injection attacks.\n\n6. Implement input validation: Utilize both client-side and server-side validation mechanisms to verify and sanitize inputs before interacting with the database. This multi-layered approach can significantly reduce the risk of successful SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, it is essential to understand the nature of the attacks, sanitize user inputs, use parameterized queries, limit database permissions, update software regularly, and implement thorough input validation.", - explanation: - "1. The expert answer mentions using parameterized queries and prepared statements as a way to prevent SQL Injection attacks.\n2. The submission also includes using parameterized queries as one of the ways to prevent SQL Injection attacks.\n3. The submission expands on the concept, explaining what SQL Injection attacks are, sanitizing user inputs, limiting database permissions, updating software regularly, and implementing input validation.\n4. All additional points mentioned in the submission are relevant and contribute to preventing SQL Injection attacks.\n\nAfter a thorough comparison, it is evident that the submission contains the content mentioned in the expert answer. \n\nGRADE: C", - metadata: { - grading: [ - { - content: - "\nYou are comparing a submitted answer to an expert answer on a given question. Here is the data:\n\n[BEGIN DATA]\n************\n[Question]: How do I prevent SQL Injection attacks?\n************\n[Expert]: use parameterized queries and prepared statements\n************\n[Submission]: Improved Answer:\n1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack where malicious SQL statements are inserted into an entry field for execution. This can allow attackers to gain unauthorized access to data or manipulate it in unintended ways.\n\n2. Sanitize user inputs: By validating and sanitizing all user inputs, you can ensure that no malicious SQL code is included. This involves removing potentially harmful characters or encoding inputs to prevent attacks.\n\n3. Use parameterized queries: Parameterized queries separate SQL code from user input, treating them as data rather than executable code. This prevents attackers from injecting SQL statements through user inputs.\n\n4. Limit database permissions: Implement a principle of least privilege by granting only necessary permissions to database users. Avoid using overly permissive admin accounts for everyday tasks, reducing the potential impact of a successful attack.\n\n5. Update and patch regularly: Keeping all software components up to date with security patches is crucial to fixing known vulnerabilities that could be exploited by attackers. This helps in safeguarding against SQL Injection attacks.\n\n6. Implement input validation: Utilize both client-side and server-side validation mechanisms to verify and sanitize inputs before interacting with the database. This multi-layered approach can significantly reduce the risk of successful SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, it is essential to understand the nature of the attacks, sanitize user inputs, use parameterized queries, limit database permissions, update software regularly, and implement thorough input validation.\n************\n[END DATA]\n\nCompare the factual content of the submitted answer with the expert answer. Ignore any differences in style, grammar, or punctuation.\n\nDoes the submission contain the content in the expert answer?\n\n\nAfter assessing the submitted answer, reply with 'GRADE: $LETTER' (without quotes) where LETTER is one of CI. Please choose ONE option for the grade: either \"C\" for correct answers, or \"I\" for incorrect answers.\n\nFor example, after reviewing a correct answer you might write 'GRADE: C' or after reviewing an incorrect answer you might write 'GRADE: I'.\n\nFirst, write out in a step by step manner your reasoning about the criterion to be sure that your conclusion is correct. Avoid simply stating the correct answers at the outset. Then, end with your answer formatted as 'GRADE: $LETTER' (without quotes) where LETTER is one of CI.\n\n", - role: "user", - }, - { - content: - "1. The expert answer mentions using parameterized queries and prepared statements as a way to prevent SQL Injection attacks.\n2. The submission also includes using parameterized queries as one of the ways to prevent SQL Injection attacks.\n3. The submission expands on the concept, explaining what SQL Injection attacks are, sanitizing user inputs, limiting database permissions, updating software regularly, and implementing input validation.\n4. All additional points mentioned in the submission are relevant and contribute to preventing SQL Injection attacks.\n\nAfter a thorough comparison, it is evident that the submission contains the content mentioned in the expert answer. \n\nGRADE: C", - source: "generate", - role: "assistant", - }, - ], - }, - }, - metadata: {}, - }, - ], - logging: [], - }, ]; export default data; diff --git a/app/(protected)/project/[project_id]/experiments/page-client.tsx b/app/(protected)/project/[project_id]/experiments/page-client.tsx index e20ff97a..665cdf86 100644 --- a/app/(protected)/project/[project_id]/experiments/page-client.tsx +++ b/app/(protected)/project/[project_id]/experiments/page-client.tsx @@ -17,7 +17,9 @@ export default function Experiments() {

Experiments

- +
-
-
- - Input Target {`Output - (${experiment.eval.model})`}
- e.stopPropagation()} /> -
- - - - - - - - - - - - - - - - - {data.map((experiment) => ( - - router.push( - `/project/${projectId}/experiments/${experiment.eval.run_id}` - ) - } - > - - - - - - - - - - + + + + + + + + + + + + ))} + +
- - Run ID - Started at - - Completed at - Task - Total Samples - Model - Plan Name - ScorerMetricsStatus
e.stopPropagation()} - > - { - if (value) { - setComparisonRunIds([ - ...comparisonRunIds, - experiment.eval.run_id, - ]); - } else { - setComparisonRunIds( - comparisonRunIds.filter( - (id) => id !== experiment.eval.run_id - ) - ); - } - }} - /> - - {experiment.eval.run_id} - - {formatDateTime(experiment.stats.started_at, true)} - - {formatDateTime(experiment.stats.completed_at, true)} - {experiment.eval.task} - {experiment.samples?.length || 0} - {experiment.eval.model}{experiment.plan.name} - {experiment.results?.scorer?.name || "N/A"} - - {Object.keys(experiment.results?.metrics).map( - (metric, i) => ( - - {metric + - ": " + - (experiment.results.metrics as any)[ - metric - ].value.toFixed(2)} - + {data && data.length === 0 && ( +
+

+ No experiments found. Get started by running your first + experiment. +

+ +
+ )} + {data && data.length > 0 && ( +
+ + + + + + + + + + + + + + + + + + {data.map((experiment) => ( + + router.push( + `/project/${projectId}/experiments/${experiment.eval.run_id}` ) - )} - - - - ))} - -
+ + Run ID + Started at + + Completed at + Task + Total Samples + Model + Plan Name + Scorer + Metrics + Status
- {experiment.status} -
-
+
e.stopPropagation()} + > + { + if (value) { + setComparisonRunIds([ + ...comparisonRunIds, + experiment.eval.run_id, + ]); + } else { + setComparisonRunIds( + comparisonRunIds.filter( + (id) => id !== experiment.eval.run_id + ) + ); + } + }} + /> + + {experiment.eval.run_id} + + {formatDateTime(experiment.stats.started_at, true)} + + {formatDateTime(experiment.stats.completed_at, true)} + + {experiment.eval.task} + + {experiment.samples?.length || 0} + + {experiment.eval.model} + + {experiment.plan.name} + + {experiment.results?.scorer?.name || "N/A"} + + {experiment.results?.metrics + ? Object.keys(experiment.results?.metrics).map( + (metric, i) => ( + + {metric + + ": " + + (experiment.results.metrics as any)[ + metric + ].value.toFixed(2)} + + ) + ) + : "N/A"} + + + {experiment.status} + +
+
+ )}
); From 577eea3d1796f127a287d76216b479a1cc0158e0 Mon Sep 17 00:00:00 2001 From: Karthik Kalyanaraman Date: Wed, 12 Jun 2024 21:18:39 -0700 Subject: [PATCH 07/16] Experiments - hook up APIs --- .../datasets/dataset/[dataset_id]/page.tsx | 7 +- .../experiments/[run_id]/page.tsx | 102 ++++++--- .../[project_id]/experiments/compare/page.tsx | 143 +++++++----- .../experiments/experiment_result.ts | 207 ------------------ .../[project_id]/experiments/page-client.tsx | 200 +++++++++-------- app/api/dataset/download/route.ts | 86 ++++++-- app/api/run/route.ts | 198 +++++++++++++++++ components/shared/download-dataset.tsx | 39 ++-- prisma/schema.prisma | 46 ++-- 9 files changed, 595 insertions(+), 433 deletions(-) delete mode 100644 app/(protected)/project/[project_id]/experiments/experiment_result.ts create mode 100644 app/api/run/route.ts diff --git a/app/(protected)/project/[project_id]/datasets/dataset/[dataset_id]/page.tsx b/app/(protected)/project/[project_id]/datasets/dataset/[dataset_id]/page.tsx index 25097520..0803efba 100644 --- a/app/(protected)/project/[project_id]/datasets/dataset/[dataset_id]/page.tsx +++ b/app/(protected)/project/[project_id]/datasets/dataset/[dataset_id]/page.tsx @@ -17,6 +17,7 @@ import { useQuery } from "react-query"; import { toast } from "sonner"; export default function Dataset() { + const projectId = useParams()?.project_id as string; const dataset_id = useParams()?.dataset_id as string; const [page, setPage] = useState(1); const [totalPages, setTotalPages] = useState(1); @@ -92,7 +93,11 @@ export default function Dataset() { Back - +
diff --git a/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx b/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx index 42dd2668..b9e93ed5 100644 --- a/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx +++ b/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx @@ -16,25 +16,60 @@ import { cn } from "@/lib/utils"; import { ChevronLeft, ChevronRight, MoveDiagonal, X } from "lucide-react"; import { useParams, useRouter } from "next/navigation"; import { useState } from "react"; -import data from "../experiment_result"; +import { useQuery } from "react-query"; +import { toast } from "sonner"; export default function Experiments() { const router = useRouter(); const runId = useParams()?.run_id as string; - const experiment = data.find((exp) => exp.eval.run_id === runId); - const [expand, setExpand] = useState( - experiment?.samples && experiment?.samples?.length > 0 - ? experiment?.samples.map(() => false) - : [] - ); + const projectId = useParams()?.project_id as string; + + const [expand, setExpand] = useState(); + const [experiment, setExperiment] = useState({}); + + const { isLoading: experimentLoading, error: experimentError } = useQuery({ + queryKey: ["fetch-experiments-query", projectId, runId], + queryFn: async () => { + const response = await fetch( + `/api/run?projectId=${projectId}&runId=${runId}` + ); + if (!response.ok) { + const error = await response.json(); + throw new Error(error?.message || "Failed to fetch the experiment"); + } + const result = await response.json(); + if (!result.run || !result.run.log) { + throw new Error("No experiment found"); + } + const exp = JSON.parse(result.run.log); + setExperiment(exp); + setExpand( + exp?.samples && exp?.samples?.length > 0 + ? exp?.samples.map(() => false) + : [] + ); + return result; + }, + onError: (error) => { + toast.error("Failed to fetch the experiment", { + description: error instanceof Error ? error.message : String(error), + }); + }, + }); + + if (experimentLoading) { + return
Loading...
; + } + return (
-
-
-

Run ID: {runId}

+
+
+

Run ID

+

{runId}

{experiment?.error && ( @@ -91,15 +129,14 @@ export default function Experiments() {
)} - {!experiment || - (experiment?.samples?.length === 0 && ( -
-

- No samples found for this experiment. -

- -
- ))} + {(!experiment?.samples || experiment?.samples?.length === 0) && ( +
+

+ No samples found for this experiment. +

+ +
+ )} {experiment?.samples && experiment?.samples?.length > 0 && (
@@ -121,12 +158,13 @@ export default function Experiments() { plan={experiment.plan} sample={sample} model={experiment.eval.model} - expand={expand[i]} + expand={expand ? expand[i] : false} setExpand={(value: boolean, index: number) => { setExpand( - expand.map((_: any, j: number) => { - return j === index ? value : expand[j]; - }) + expand && + expand.map((_: any, j: number) => { + return j === index ? value : expand[j]; + }) ); }} /> diff --git a/app/(protected)/project/[project_id]/experiments/compare/page.tsx b/app/(protected)/project/[project_id]/experiments/compare/page.tsx index ab4a4686..58101d04 100644 --- a/app/(protected)/project/[project_id]/experiments/compare/page.tsx +++ b/app/(protected)/project/[project_id]/experiments/compare/page.tsx @@ -2,39 +2,63 @@ import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; -import { Checkbox } from "@/components/ui/checkbox"; import { cn } from "@/lib/utils"; import { ChevronLeft } from "lucide-react"; -import { useRouter, useSearchParams } from "next/navigation"; +import { useParams, useRouter, useSearchParams } from "next/navigation"; import { useState } from "react"; -import data from "../experiment_result"; +import { useQuery } from "react-query"; +import { toast } from "sonner"; export default function Experiments() { const router = useRouter(); // get run id from query params const searchParams = useSearchParams(); + const projectId = useParams()?.project_id as string; const runIds = searchParams.getAll("run_id") as string[]; - const experiments = data.filter((exp) => runIds.includes(exp.eval.run_id)); - const isComparable = verifyIfSampleInputsMatch(runIds, data); - if (!isComparable) { - return ( -
-

- The selected experiments are not comparable. Please select experiments - ran against the same dataset. -

- -
- ); + const [isComparable, setIsComparable] = useState(false); + + const { + data: experiments, + isLoading: experimentsLoading, + error: experimentsError, + } = useQuery({ + queryKey: ["fetch-experiments-query", projectId, ...runIds], + queryFn: async () => { + const fetchPromises = runIds.map(async (runId) => { + const response = await fetch( + `/api/run?projectId=${projectId}&runId=${runId}` + ); + if (!response.ok) { + const error = await response.json(); + throw new Error(error?.message || "Failed to fetch the experiment"); + } + const result = await response.json(); + if (!result.run || !result.run.log) { + throw new Error("No experiment found"); + } + return JSON.parse(result.run.log); + }); + + const exps = await Promise.all(fetchPromises); + setIsComparable(verifyIfSampleInputsMatch(exps)); + return exps; + }, + onError: (error) => { + toast.error("Failed to fetch one or more experiments", { + description: error instanceof Error ? error.message : String(error), + }); + }, + }); + + if (experimentsLoading) { + return
Loading...
; } return (
-
- {/*

Run ID: {runId}

*/} - +
+

Comparing Runs

+

{runIds.join(", ")}

@@ -52,32 +76,49 @@ export default function Experiments() {
))} - {experiments[0]?.samples && experiments[0]?.samples?.length > 0 && ( -
-
- - - - - - {experiments.map((experiment, i) => ( - - ))} - - - - {experiments[0].samples.map((_: any, i: number) => ( - - ))} - -
- - InputTarget{`Output - (${experiment.eval.model})`}
+ {!isComparable && ( +
+

+ The selected experiments are not comparable. Please select + experiments ran against the same dataset. +

+
)} + {isComparable && + experiments && + experiments[0]?.samples && + experiments[0]?.samples?.length > 0 && ( +
+ + + + + + {experiments && + experiments.map((experiment, i) => ( + + ))} + + + + {experiments[0].samples.map((_: any, i: number) => ( + + ))} + +
+ Input + + Target + {`Output - (${experiment.eval.model})`}
+
+ )}
); @@ -96,9 +137,6 @@ function SampleRow({ className="hover:cursor-pointer hover:bg-muted group" onClick={() => setOpen(!open)} > - - e.stopPropagation()} /> - {typeof experiments[0]?.samples[index]?.input === "string" ? experiments[0]?.samples[index]?.input @@ -144,12 +182,15 @@ function SampleRow({ ); } -function verifyIfSampleInputsMatch(runIds: string[], data: any): boolean { - const experiments = data.filter((exp: any) => - runIds.includes(exp.eval.run_id) - ); +function verifyIfSampleInputsMatch(experiments: any[]): boolean { if (experiments.length === 0) return false; + // also check if the length of samples is the same for all experiments + for (let j = 1; j < experiments?.length; j++) { + if (experiments[j]?.samples?.length !== experiments[0]?.samples?.length) + return false; + } + // iterate through each experiment and each sample and check if the input of sample at index i matches with the input of sample at index i for all experiments for (let i = 0; i < experiments[0]?.samples?.length; i++) { const input = experiments[0]?.samples[i]?.input; diff --git a/app/(protected)/project/[project_id]/experiments/experiment_result.ts b/app/(protected)/project/[project_id]/experiments/experiment_result.ts deleted file mode 100644 index c37fe279..00000000 --- a/app/(protected)/project/[project_id]/experiments/experiment_result.ts +++ /dev/null @@ -1,207 +0,0 @@ -const data: any[] = [ - { - version: 1, - status: "error", - eval: { - task: "security_guide", - task_version: 0, - task_file: "src/main.py", - task_id: "agwg9gxU5RHpXc3aALKijk", - run_id: "Kkg2c5UHPamfMk9eYfJLzD", - created: "2024-05-30T11:07:59", - dataset: { - name: "security_guide", - location: "example://security_guide", - }, - model: "openai/gpt-4", - task_attribs: {}, - task_args: {}, - model_args: {}, - config: {}, - packages: { - inspect_ai: "0.3.10", - }, - }, - plan: { - name: "plan", - steps: [ - { - solver: "chain_of_thought", - params: {}, - }, - { - solver: "generate", - params: {}, - }, - { - solver: "self_critique", - params: {}, - }, - ], - config: {}, - }, - stats: { - started_at: "2024-05-30T11:07:59", - completed_at: "2024-05-30T11:07:59", - model_usage: {}, - }, - error: { - message: - "ValueError('user_prompt requested from TaskState but none available')", - traceback: - 'Traceback (most recent call last):\n\n File "/Users/karthikkalyanaraman/work/inspect_ai_playground/pyenv/lib/python3.10/site-packages/inspect_ai/_eval/task/run.py", line 175, in task_run\n scores = await asyncio.gather(*tasks)\n\n File "/Users/karthikkalyanaraman/work/inspect_ai_playground/pyenv/lib/python3.10/site-packages/inspect_ai/_eval/task/run.py", line 256, in task_run_sample\n state = await solver(state, generate)\n\n File "/Users/karthikkalyanaraman/work/inspect_ai_playground/pyenv/lib/python3.10/site-packages/inspect_ai/solver/_prompt.py", line 33, in solve\n prompt = state.user_prompt\n\n File "/Users/karthikkalyanaraman/work/inspect_ai_playground/pyenv/lib/python3.10/site-packages/inspect_ai/solver/_solver.py", line 124, in user_prompt\n raise ValueError("user_prompt requested from TaskState but none available")\n\nValueError: user_prompt requested from TaskState but none available\n', - traceback_ansi: - "\u001b[31m╭─\u001b[0m\u001b[31m──────────────────────────────\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31m───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n\u001b[31m│\u001b[0m \u001b[2;33m/Users/karthikkalyanaraman/work/inspect_ai_playground/pyenv/lib/python3.10/site-packages/inspect\u001b[0m \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[2;33m_ai/_eval/task/\u001b[0m\u001b[1;33mrun.py\u001b[0m:\u001b[94m175\u001b[0m in \u001b[92mtask_run\u001b[0m \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[2;33m/Users/karthikkalyanaraman/work/inspect_ai_playground/pyenv/lib/python3.10/site-packages/inspect\u001b[0m \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[2;33m_ai/_eval/task/\u001b[0m\u001b[1;33mrun.py\u001b[0m:\u001b[94m256\u001b[0m in \u001b[92mtask_run_sample\u001b[0m \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[2;33m/Users/karthikkalyanaraman/work/inspect_ai_playground/pyenv/lib/python3.10/site-packages/inspect\u001b[0m \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[2;33m_ai/solver/\u001b[0m\u001b[1;33m_prompt.py\u001b[0m:\u001b[94m33\u001b[0m in \u001b[92msolve\u001b[0m \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[2;33m/Users/karthikkalyanaraman/work/inspect_ai_playground/pyenv/lib/python3.10/site-packages/inspect\u001b[0m \u001b[31m│\u001b[0m\n\u001b[31m│\u001b[0m \u001b[2;33m_ai/solver/\u001b[0m\u001b[1;33m_solver.py\u001b[0m:\u001b[94m124\u001b[0m in \u001b[92muser_prompt\u001b[0m \u001b[31m│\u001b[0m\n\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n\u001b[1;91mValueError: \u001b[0muser_prompt requested from TaskState but none available\n", - }, - logging: [], - }, - { - version: 1, - status: "success", - eval: { - task: "security_guide", - task_version: 0, - task_file: "src/main.py", - task_id: "WaGqtQ7sZYWkmmDYcubjiv", - run_id: "ar7LzC2PefbchfLMosfAWF", - created: "2024-05-30T11:06:32", - dataset: { - name: "security_guide", - location: "example://security_guide", - }, - model: "openai/gpt-3.5-turbo", - task_attribs: {}, - task_args: {}, - model_args: {}, - config: {}, - packages: { - inspect_ai: "0.3.10", - }, - }, - plan: { - name: "plan", - steps: [ - { - solver: "chain_of_thought", - params: {}, - }, - { - solver: "generate", - params: {}, - }, - { - solver: "self_critique", - params: {}, - }, - ], - config: {}, - }, - results: { - scorer: { - name: "model_graded_fact", - params: {}, - }, - metrics: { - accuracy: { - name: "accuracy", - value: 1.0, - options: {}, - }, - bootstrap_std: { - name: "bootstrap_std", - value: 0.0, - options: {}, - }, - }, - }, - stats: { - started_at: "2024-05-30T11:06:32", - completed_at: "2024-05-30T11:06:45", - model_usage: { - "openai/gpt-3.5-turbo": { - input_tokens: 1677, - output_tokens: 728, - total_tokens: 2405, - }, - }, - }, - samples: [ - { - id: 1, - epoch: 1, - input: "How do I prevent SQL Injection attacks?", - target: "use parameterized queries and prepared statements", - messages: [ - { - content: - '\nHow do I prevent SQL Injection attacks?\n\nBefore answering, reason in a step-by-step manner as to get the right answer. Provide your answer at the end on its own line in the form "ANSWER: $ANSWER" (without quotes) where $ANSWER is the answer to the question.\n', - source: "input", - role: "user", - }, - { - content: - "1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack that allows attackers to manipulate the SQL code of a web application to gain unauthorized access to the database or to perform harmful actions.\n\n2. Sanitize user inputs: One of the most common ways attackers exploit SQL Injection vulnerabilities is through user inputs. Make sure to validate and sanitize all user inputs to prevent malicious code from being executed.\n\n3. Use parameterized queries: Instead of dynamically creating SQL queries by concatenating strings, use parameterized queries which automatically sanitize inputs by treating them as data rather than executable code.\n\n4. Limit database permissions: Ensure that the database user account used by your application has the least amount of privileges necessary. Avoid using an admin account for regular application tasks.\n\n5. Update and patch regularly: Make sure your database management system, web server, and application software are up to date with the latest security patches to protect against known vulnerabilities.\n\n6. Implement input validation: Validate and sanitize user inputs on both client-side and server-side to reduce the risk of SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, sanitize user inputs, use parameterized queries, limit database permissions, update and patch regularly, and implement input validation.", - source: "generate", - role: "assistant", - }, - { - content: - '\nGiven the following question, initial answer and critique please generate an improved answer to the question:\n\n[BEGIN DATA]\n***\n[Question]: How do I prevent SQL Injection attacks?\n***\n[Answer]: 1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack that allows attackers to manipulate the SQL code of a web application to gain unauthorized access to the database or to perform harmful actions.\n\n2. Sanitize user inputs: One of the most common ways attackers exploit SQL Injection vulnerabilities is through user inputs. Make sure to validate and sanitize all user inputs to prevent malicious code from being executed.\n\n3. Use parameterized queries: Instead of dynamically creating SQL queries by concatenating strings, use parameterized queries which automatically sanitize inputs by treating them as data rather than executable code.\n\n4. Limit database permissions: Ensure that the database user account used by your application has the least amount of privileges necessary. Avoid using an admin account for regular application tasks.\n\n5. Update and patch regularly: Make sure your database management system, web server, and application software are up to date with the latest security patches to protect against known vulnerabilities.\n\n6. Implement input validation: Validate and sanitize user inputs on both client-side and server-side to reduce the risk of SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, sanitize user inputs, use parameterized queries, limit database permissions, update and patch regularly, and implement input validation.\n***\n[Critique]: The answer provides a good overview of ways to prevent SQL Injection attacks, but it lacks depth and specificity in explaining each point. It would be beneficial to provide more details and examples for each prevention method to ensure a comprehensive understanding for the reader. Additionally, the answer could benefit from explaining why each prevention method is effective in stopping SQL Injection attacks.\n***\n[END DATA]\n\nIf the original answer is already correct, just repeat the original answer exactly. Provide your answer at the end on its own line in the form "ANSWER: $ANSWER" (without quotes) where $ANSWER is the answer to the question.\n', - role: "user", - }, - { - content: - "Improved Answer:\n1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack where malicious SQL statements are inserted into an entry field for execution. This can allow attackers to gain unauthorized access to data or manipulate it in unintended ways.\n\n2. Sanitize user inputs: By validating and sanitizing all user inputs, you can ensure that no malicious SQL code is included. This involves removing potentially harmful characters or encoding inputs to prevent attacks.\n\n3. Use parameterized queries: Parameterized queries separate SQL code from user input, treating them as data rather than executable code. This prevents attackers from injecting SQL statements through user inputs.\n\n4. Limit database permissions: Implement a principle of least privilege by granting only necessary permissions to database users. Avoid using overly permissive admin accounts for everyday tasks, reducing the potential impact of a successful attack.\n\n5. Update and patch regularly: Keeping all software components up to date with security patches is crucial to fixing known vulnerabilities that could be exploited by attackers. This helps in safeguarding against SQL Injection attacks.\n\n6. Implement input validation: Utilize both client-side and server-side validation mechanisms to verify and sanitize inputs before interacting with the database. This multi-layered approach can significantly reduce the risk of successful SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, it is essential to understand the nature of the attacks, sanitize user inputs, use parameterized queries, limit database permissions, update software regularly, and implement thorough input validation.", - source: "generate", - role: "assistant", - }, - ], - output: { - model: "gpt-3.5-turbo-0125", - choices: [ - { - message: { - content: - "Improved Answer:\n1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack where malicious SQL statements are inserted into an entry field for execution. This can allow attackers to gain unauthorized access to data or manipulate it in unintended ways.\n\n2. Sanitize user inputs: By validating and sanitizing all user inputs, you can ensure that no malicious SQL code is included. This involves removing potentially harmful characters or encoding inputs to prevent attacks.\n\n3. Use parameterized queries: Parameterized queries separate SQL code from user input, treating them as data rather than executable code. This prevents attackers from injecting SQL statements through user inputs.\n\n4. Limit database permissions: Implement a principle of least privilege by granting only necessary permissions to database users. Avoid using overly permissive admin accounts for everyday tasks, reducing the potential impact of a successful attack.\n\n5. Update and patch regularly: Keeping all software components up to date with security patches is crucial to fixing known vulnerabilities that could be exploited by attackers. This helps in safeguarding against SQL Injection attacks.\n\n6. Implement input validation: Utilize both client-side and server-side validation mechanisms to verify and sanitize inputs before interacting with the database. This multi-layered approach can significantly reduce the risk of successful SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, it is essential to understand the nature of the attacks, sanitize user inputs, use parameterized queries, limit database permissions, update software regularly, and implement thorough input validation.", - source: "generate", - role: "assistant", - }, - stop_reason: "stop", - }, - ], - usage: { - input_tokens: 738, - output_tokens: 294, - total_tokens: 1032, - }, - }, - score: { - value: "C", - answer: - "Improved Answer:\n1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack where malicious SQL statements are inserted into an entry field for execution. This can allow attackers to gain unauthorized access to data or manipulate it in unintended ways.\n\n2. Sanitize user inputs: By validating and sanitizing all user inputs, you can ensure that no malicious SQL code is included. This involves removing potentially harmful characters or encoding inputs to prevent attacks.\n\n3. Use parameterized queries: Parameterized queries separate SQL code from user input, treating them as data rather than executable code. This prevents attackers from injecting SQL statements through user inputs.\n\n4. Limit database permissions: Implement a principle of least privilege by granting only necessary permissions to database users. Avoid using overly permissive admin accounts for everyday tasks, reducing the potential impact of a successful attack.\n\n5. Update and patch regularly: Keeping all software components up to date with security patches is crucial to fixing known vulnerabilities that could be exploited by attackers. This helps in safeguarding against SQL Injection attacks.\n\n6. Implement input validation: Utilize both client-side and server-side validation mechanisms to verify and sanitize inputs before interacting with the database. This multi-layered approach can significantly reduce the risk of successful SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, it is essential to understand the nature of the attacks, sanitize user inputs, use parameterized queries, limit database permissions, update software regularly, and implement thorough input validation.", - explanation: - "1. The expert answer mentions using parameterized queries and prepared statements as a way to prevent SQL Injection attacks.\n2. The submission also includes using parameterized queries as one of the ways to prevent SQL Injection attacks.\n3. The submission expands on the concept, explaining what SQL Injection attacks are, sanitizing user inputs, limiting database permissions, updating software regularly, and implementing input validation.\n4. All additional points mentioned in the submission are relevant and contribute to preventing SQL Injection attacks.\n\nAfter a thorough comparison, it is evident that the submission contains the content mentioned in the expert answer. \n\nGRADE: C", - metadata: { - grading: [ - { - content: - "\nYou are comparing a submitted answer to an expert answer on a given question. Here is the data:\n\n[BEGIN DATA]\n************\n[Question]: How do I prevent SQL Injection attacks?\n************\n[Expert]: use parameterized queries and prepared statements\n************\n[Submission]: Improved Answer:\n1. Understand what SQL Injection attacks are: SQL Injection is a type of cyberattack where malicious SQL statements are inserted into an entry field for execution. This can allow attackers to gain unauthorized access to data or manipulate it in unintended ways.\n\n2. Sanitize user inputs: By validating and sanitizing all user inputs, you can ensure that no malicious SQL code is included. This involves removing potentially harmful characters or encoding inputs to prevent attacks.\n\n3. Use parameterized queries: Parameterized queries separate SQL code from user input, treating them as data rather than executable code. This prevents attackers from injecting SQL statements through user inputs.\n\n4. Limit database permissions: Implement a principle of least privilege by granting only necessary permissions to database users. Avoid using overly permissive admin accounts for everyday tasks, reducing the potential impact of a successful attack.\n\n5. Update and patch regularly: Keeping all software components up to date with security patches is crucial to fixing known vulnerabilities that could be exploited by attackers. This helps in safeguarding against SQL Injection attacks.\n\n6. Implement input validation: Utilize both client-side and server-side validation mechanisms to verify and sanitize inputs before interacting with the database. This multi-layered approach can significantly reduce the risk of successful SQL Injection attacks.\n\nANSWER: To prevent SQL Injection attacks, it is essential to understand the nature of the attacks, sanitize user inputs, use parameterized queries, limit database permissions, update software regularly, and implement thorough input validation.\n************\n[END DATA]\n\nCompare the factual content of the submitted answer with the expert answer. Ignore any differences in style, grammar, or punctuation.\n\nDoes the submission contain the content in the expert answer?\n\n\nAfter assessing the submitted answer, reply with 'GRADE: $LETTER' (without quotes) where LETTER is one of CI. Please choose ONE option for the grade: either \"C\" for correct answers, or \"I\" for incorrect answers.\n\nFor example, after reviewing a correct answer you might write 'GRADE: C' or after reviewing an incorrect answer you might write 'GRADE: I'.\n\nFirst, write out in a step by step manner your reasoning about the criterion to be sure that your conclusion is correct. Avoid simply stating the correct answers at the outset. Then, end with your answer formatted as 'GRADE: $LETTER' (without quotes) where LETTER is one of CI.\n\n", - role: "user", - }, - { - content: - "1. The expert answer mentions using parameterized queries and prepared statements as a way to prevent SQL Injection attacks.\n2. The submission also includes using parameterized queries as one of the ways to prevent SQL Injection attacks.\n3. The submission expands on the concept, explaining what SQL Injection attacks are, sanitizing user inputs, limiting database permissions, updating software regularly, and implementing input validation.\n4. All additional points mentioned in the submission are relevant and contribute to preventing SQL Injection attacks.\n\nAfter a thorough comparison, it is evident that the submission contains the content mentioned in the expert answer. \n\nGRADE: C", - source: "generate", - role: "assistant", - }, - ], - }, - }, - metadata: {}, - }, - ], - logging: [], - }, -]; - -export default data; diff --git a/app/(protected)/project/[project_id]/experiments/page-client.tsx b/app/(protected)/project/[project_id]/experiments/page-client.tsx index 665cdf86..d5480106 100644 --- a/app/(protected)/project/[project_id]/experiments/page-client.tsx +++ b/app/(protected)/project/[project_id]/experiments/page-client.tsx @@ -4,24 +4,55 @@ import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; import { Checkbox } from "@/components/ui/checkbox"; import { cn, formatDateTime } from "@/lib/utils"; +import { Run } from "@prisma/client"; import { useParams, useRouter } from "next/navigation"; import { useState } from "react"; -import data from "./experiment_result"; +import { useQuery } from "react-query"; +import { toast } from "sonner"; export default function Experiments() { const router = useRouter(); const projectId = useParams()?.project_id as string; const [comparisonRunIds, setComparisonRunIds] = useState([]); + + const { + data: experiments, + isLoading: experimentsLoading, + error: experimentsError, + } = useQuery({ + queryKey: ["fetch-experiments-query"], + queryFn: async () => { + const response = await fetch(`/api/run?projectId=${projectId}`); + if (!response.ok) { + const error = await response.json(); + throw new Error(error?.message || "Failed to fetch experiments"); + } + const result = await response.json(); + return result; + }, + onError: (error) => { + toast.error("Failed to fetch experiments", { + description: error instanceof Error ? error.message : String(error), + }); + }, + }); + + if (experimentsLoading) { + return
Loading...
; + } + return (

Experiments

-
- {data && data.length === 0 && ( + {experiments && experiments.runs && experiments.runs?.length === 0 && (

No experiments found. Get started by running your first @@ -45,7 +76,7 @@ export default function Experiments() {

)} - {data && data.length > 0 && ( + {experiments && experiments.runs && experiments.runs?.length > 0 && (
@@ -76,91 +107,86 @@ export default function Experiments() { - {data.map((experiment) => ( - - router.push( - `/project/${projectId}/experiments/${experiment.eval.run_id}` - ) - } - > - + router.push( + `/project/${projectId}/experiments/${log?.eval?.run_id}` + ) + } > - { - if (value) { - setComparisonRunIds([ - ...comparisonRunIds, - experiment.eval.run_id, - ]); - } else { - setComparisonRunIds( - comparisonRunIds.filter( - (id) => id !== experiment.eval.run_id + + + + + + + + + + - - - - - - - - - - - - ))} + : "N/A"} + + + + ); + })}
e.stopPropagation()} + {experiments?.runs.map((experiment: Run) => { + const log: any = JSON.parse(experiment.log as string); + return ( +
e.stopPropagation()} + > + { + if (value) { + setComparisonRunIds([ + ...comparisonRunIds, + log?.eval?.run_id, + ]); + } else { + setComparisonRunIds( + comparisonRunIds.filter( + (id) => id !== log?.eval?.run_id + ) + ); + } + }} + /> + {log?.eval?.run_id} + {formatDateTime(log?.stats?.started_at, true)} + + {formatDateTime(log?.stats?.completed_at, true)} + {log?.eval?.task} + {log?.samples?.length || 0} + {log?.eval?.model}{log?.plan?.name} + {log?.results?.scorer?.name || "N/A"} + + {log?.results?.metrics + ? Object.keys(log?.results?.metrics).map( + (metric, i) => ( + + {metric + + ": " + + (log?.results?.metrics as any)[ + metric + ].value.toFixed(2)} + ) - ); - } - }} - /> - - {experiment.eval.run_id} - - {formatDateTime(experiment.stats.started_at, true)} - - {formatDateTime(experiment.stats.completed_at, true)} - - {experiment.eval.task} - - {experiment.samples?.length || 0} - - {experiment.eval.model} - - {experiment.plan.name} - - {experiment.results?.scorer?.name || "N/A"} - - {experiment.results?.metrics - ? Object.keys(experiment.results?.metrics).map( - (metric, i) => ( - - {metric + - ": " + - (experiment.results.metrics as any)[ - metric - ].value.toFixed(2)} - ) - ) - : "N/A"} - - - {experiment.status} - -
+ + {log?.status} + +
diff --git a/app/api/dataset/download/route.ts b/app/api/dataset/download/route.ts index 36564551..cb29e18f 100644 --- a/app/api/dataset/download/route.ts +++ b/app/api/dataset/download/route.ts @@ -1,21 +1,54 @@ import { authOptions } from "@/lib/auth/options"; import prisma from "@/lib/prisma"; -import json2csv from 'json2csv'; +import { authApiKey } from "@/lib/utils"; +import json2csv from "json2csv"; import { getServerSession } from "next-auth"; import { redirect } from "next/navigation"; import { NextRequest, NextResponse } from "next/server"; export async function GET(req: NextRequest) { try { - const session = await getServerSession(authOptions); - if (!session || !session.user) { - redirect("/login"); + let projectId = ""; + const apiKey = req.headers.get("x-api-key"); + if (!apiKey) { + const session = await getServerSession(authOptions); + if (!session || !session.user) { + redirect("/login"); + } + } else { + const response = await authApiKey(apiKey!); + if (response.status !== 200) { + return response; + } + + // Get project data + const projectData = await response.json(); + projectId = projectData.data.project.id; + } + + if (!projectId) { + projectId = req.nextUrl.searchParams.get("projectId") as string; + } + + const project = await prisma.project.findFirst({ + where: { + id: projectId, + }, + }); + + if (!project) { + return NextResponse.json( + { + message: "No projects found", + }, + { status: 404 } + ); } + const datasetId = req.nextUrl.searchParams.get("id") as string; const pageParam = req.nextUrl.searchParams.get("page"); let page = pageParam ? parseInt(pageParam, 10) : 1; const pageSize = 500; - let dataset; if (!datasetId) { return NextResponse.json( { @@ -24,17 +57,15 @@ export async function GET(req: NextRequest) { { status: 404 } ); } - else { - dataset = await prisma.dataset.findFirst({ - where: { - id: datasetId, - }, - include: { - Data: true, - }, - }); + const dataset = await prisma.dataset.findFirst({ + where: { + id: datasetId, + }, + include: { + Data: true, + }, + }); - } if (!dataset) { return NextResponse.json( { @@ -55,15 +86,28 @@ export async function GET(req: NextRequest) { skip: (page - 1) * pageSize, }); - const csv = json2csv.parse(data); - const datasetName = dataset.name.toLowerCase().replace(/\s+/g, '_'); - const timestamp = new Date().toISOString().slice(0, 19).replace(/[-:]/g, ''); + const result: any = []; + // convert the data to include only input, output and expectedOutput fields. Rename output to target + data.forEach((d) => { + result.push({ + input: d.input, + target: d.output, + annotated_output: d.annotatedOutput, + }); + }); + + const csv = json2csv.parse(result); + const datasetName = dataset.name.toLowerCase().replace(/\s+/g, "_"); + const timestamp = new Date() + .toISOString() + .slice(0, 19) + .replace(/[-:]/g, ""); const filename = `${datasetName}_${timestamp}.csv`; return new NextResponse(csv, { headers: { - 'Content-Type': 'text/csv', - 'Content-Disposition': `filename:${filename}`, + "Content-Type": "text/csv", + "Content-Disposition": `filename:${filename}`, }, }); } catch (error) { @@ -74,4 +118,4 @@ export async function GET(req: NextRequest) { { status: 500 } ); } -} \ No newline at end of file +} diff --git a/app/api/run/route.ts b/app/api/run/route.ts new file mode 100644 index 00000000..0276b0da --- /dev/null +++ b/app/api/run/route.ts @@ -0,0 +1,198 @@ +import { authOptions } from "@/lib/auth/options"; +import prisma from "@/lib/prisma"; +import { authApiKey } from "@/lib/utils"; +import { getServerSession } from "next-auth"; +import { redirect } from "next/navigation"; +import { NextRequest, NextResponse } from "next/server"; + +export async function GET(req: NextRequest) { + try { + let projectId = ""; + const session = await getServerSession(authOptions); + if (!session || !session.user) { + const apiKey = req.headers.get("x-api-key"); + if (!apiKey) { + redirect("/login"); + } + + const response = await authApiKey(apiKey!); + if (response.status !== 200) { + return response; + } + + // Get project data + const projectData = await response.json(); + projectId = projectData.data.project.id; + } + + if (!projectId) { + projectId = req.nextUrl.searchParams.get("projectId") as string; + } + + const project = await prisma.project.findFirst({ + where: { + id: projectId, + }, + }); + + if (!project) { + return NextResponse.json( + { + message: "No projects found", + }, + { status: 404 } + ); + } + + // Get run by id + const runId = req.nextUrl.searchParams.get("runId"); + if (runId) { + const run = await prisma.run.findFirst({ + where: { + projectId: projectId, + runId: runId, + }, + }); + + return NextResponse.json({ + run: run, + }); + } + + // Get runs + const pageParam = req.nextUrl.searchParams.get("page"); + const pageSizeParam = req.nextUrl.searchParams.get("pageSize"); + let page = pageParam ? parseInt(pageParam, 10) : 1; + const pageSize = pageSizeParam ? parseInt(pageSizeParam, 10) : 10; + + const totalLen = await prisma.run.count({ + where: { + projectId: projectId, + }, + }); + + const totalPages = + Math.ceil(totalLen / pageSize) === 0 ? 1 : Math.ceil(totalLen / pageSize); + const md = { page, page_size: pageSize, total_pages: totalPages }; + + if (page! > totalPages) { + page = totalPages; + } + + const runs = await prisma.run.findMany({ + where: { + projectId: projectId, + }, + orderBy: { + createdAt: "desc", + }, + take: pageSize, + skip: (page - 1) * pageSize, + }); + + return NextResponse.json({ + runs: runs, + metadata: md, + }); + } catch (error) { + return NextResponse.json( + { + message: "Internal server error", + }, + { status: 500 } + ); + } +} + +export async function POST(req: NextRequest) { + const apiKey = req.headers.get("x-api-key"); + if (!apiKey) { + redirect("/login"); + } + + const response = await authApiKey(apiKey!); + if (response.status !== 200) { + return response; + } + + // Get project data + const projectData = await response.json(); + const projectId = projectData.data.project.id; + + const d = await req.json(); + const { runId, taskId, description, wfVersion, log, datasetId } = d; + + const payload: any = { + runId, + taskId, + log, + projectId, + }; + + if (description) { + payload["description"] = description; + } + if (wfVersion) { + payload["wfVersion"] = wfVersion; + } + if (datasetId) { + payload["datasetId"] = datasetId; + } + + const run = await prisma.run.create({ + data: payload, + }); + + return NextResponse.json({ + run: run, + }); +} + +export async function PUT(req: NextRequest) { + const session = await getServerSession(authOptions); + if (!session || !session.user) { + redirect("/login"); + } + + const d = await req.json(); + const { id, description, wfVersion, datasetId } = d; + const payload: any = {}; + if (description) { + payload["description"] = description; + } + if (wfVersion) { + payload["wfVersion"] = wfVersion; + } + if (datasetId) { + payload["datasetId"] = datasetId; + } + + const run = await prisma.dataset.update({ + where: { + id, + }, + data: payload, + }); + + return NextResponse.json({ + run: run, + }); +} + +export async function DELETE(req: NextRequest) { + const session = await getServerSession(authOptions); + if (!session || !session.user) { + redirect("/login"); + } + + const data = await req.json(); + const { id } = data; + + await prisma.run.delete({ + where: { + id, + }, + }); + + return NextResponse.json({}); +} diff --git a/components/shared/download-dataset.tsx b/components/shared/download-dataset.tsx index e6b550bf..2fe0636b 100644 --- a/components/shared/download-dataset.tsx +++ b/components/shared/download-dataset.tsx @@ -12,16 +12,17 @@ import { DialogTitle, DialogTrigger, } from "@/components/ui/dialog"; -import { useState } from 'react'; +import { useState } from "react"; import { toast } from "sonner"; export function DownloadDataset({ + projectId, datasetId, - disabled=false, + disabled = false, }: { + projectId: string; datasetId: string; disabled?: boolean; - }) { const [open, setOpen] = useState(false); const [busy, setBusy] = useState(false); @@ -29,23 +30,26 @@ export function DownloadDataset({ setBusy(true); try { datasetId = datasetId.toString(); - const response = await fetch(`/api/dataset/download?id=${datasetId}`, { - method: "GET", - headers: { - "Content-Type": "text/csv", - }, - }); + const response = await fetch( + `/api/dataset/download?id=${datasetId}&projectId=${projectId}`, + { + method: "GET", + headers: { + "Content-Type": "text/csv", + }, + } + ); if (!response.ok) { throw new Error("Failed to download dataset."); } // Extract filename from Content-Disposition header - const contentDisposition = response.headers.get('Content-Disposition'); + const contentDisposition = response.headers.get("Content-Disposition"); let filename; if (contentDisposition) { - const filenameKeyValue = contentDisposition.split(':')[1].split('.'); + const filenameKeyValue = contentDisposition.split(":")[1].split("."); if (filenameKeyValue.length === 2) { filename = filenameKeyValue[0]; @@ -74,7 +78,7 @@ export function DownloadDataset({ return ( - @@ -82,11 +86,16 @@ export function DownloadDataset({ Download Dataset - This will download the data as .csv and only up to a maximum of 500 records. To download the entire dataset, please contact us. + This will download the data as .csv and only up to a maximum of 500 + records. To download the entire dataset, please contact us. - ); -} \ No newline at end of file +} diff --git a/prisma/schema.prisma b/prisma/schema.prisma index 2ab4a476..89585f57 100644 --- a/prisma/schema.prisma +++ b/prisma/schema.prisma @@ -95,10 +95,18 @@ model Evaluation { } model Run { - id String @id @default(cuid()) - name String + id String @id @default(cuid()) + runId String + taskId String description String? - Data Data[] + wfVersion String? + log Json? + datasetId String? + projectId String + Dataset Dataset? @relation(fields: [datasetId], references: [id]) + Project Project @relation(fields: [projectId], references: [id], onDelete: Cascade) + createdAt DateTime @default(now()) + updatedAt DateTime @updatedAt } model Prompt { @@ -119,22 +127,20 @@ model Prompt { } model Data { - id String @id @default(cuid()) - input String - output String - contexts String[] - expectedOutput String? - note String? - spanId String? - Evaluation Evaluation[] - runId String? - Run Run? @relation(fields: [runId], references: [id], onDelete: Cascade) - datasetId String? - Dataset Dataset? @relation(fields: [datasetId], references: [id], onDelete: Cascade) - projectId String? - Project Project? @relation(fields: [projectId], references: [id], onDelete: Cascade) - createdAt DateTime @default(now()) - updatedAt DateTime @updatedAt + id String @id @default(cuid()) + input String + output String + contexts String[] + annotatedOutput String? + note String? + spanId String? + Evaluation Evaluation[] + datasetId String? + Dataset Dataset? @relation(fields: [datasetId], references: [id], onDelete: Cascade) + projectId String? + Project Project? @relation(fields: [projectId], references: [id], onDelete: Cascade) + createdAt DateTime @default(now()) + updatedAt DateTime @updatedAt } model Dataset { @@ -146,6 +152,7 @@ model Dataset { createdAt DateTime @default(now()) updatedAt DateTime @updatedAt Data Data[] + Run Run[] } model Promptset { @@ -184,6 +191,7 @@ model Project { Promptset Promptset[] Test Test[] Data Data[] + Run Run[] } model TeamInvite { From 699f37e7497f45b8582a8b24bffdecb305bead3d Mon Sep 17 00:00:00 2001 From: Karthik Kalyanaraman Date: Wed, 12 Jun 2024 21:32:24 -0700 Subject: [PATCH 08/16] Experiments pagination --- .../[project_id]/experiments/page-client.tsx | 58 ++++++++++++++----- 1 file changed, 43 insertions(+), 15 deletions(-) diff --git a/app/(protected)/project/[project_id]/experiments/page-client.tsx b/app/(protected)/project/[project_id]/experiments/page-client.tsx index d5480106..13130120 100644 --- a/app/(protected)/project/[project_id]/experiments/page-client.tsx +++ b/app/(protected)/project/[project_id]/experiments/page-client.tsx @@ -7,6 +7,7 @@ import { cn, formatDateTime } from "@/lib/utils"; import { Run } from "@prisma/client"; import { useParams, useRouter } from "next/navigation"; import { useState } from "react"; +import { useBottomScrollListener } from "react-bottom-scroll-listener"; import { useQuery } from "react-query"; import { toast } from "sonner"; @@ -14,15 +15,27 @@ export default function Experiments() { const router = useRouter(); const projectId = useParams()?.project_id as string; const [comparisonRunIds, setComparisonRunIds] = useState([]); + const [page, setPage] = useState(1); + const [totalPages, setTotalPages] = useState(1); + const [currentData, setCurrentData] = useState([]); + const [showLoader, setShowLoader] = useState(false); - const { - data: experiments, - isLoading: experimentsLoading, - error: experimentsError, - } = useQuery({ + const scrollableDivRef = useBottomScrollListener(() => { + if (fetchExperiments.isRefetching) { + return; + } + if (page <= totalPages) { + setShowLoader(true); + fetchExperiments.refetch(); + } + }); + + const fetchExperiments = useQuery({ queryKey: ["fetch-experiments-query"], queryFn: async () => { - const response = await fetch(`/api/run?projectId=${projectId}`); + const response = await fetch( + `/api/run?projectId=${projectId}&page=${page}&pageSize=25` + ); if (!response.ok) { const error = await response.json(); throw new Error(error?.message || "Failed to fetch experiments"); @@ -30,6 +43,23 @@ export default function Experiments() { const result = await response.json(); return result; }, + onSuccess: (data) => { + const newData = data.runs || []; + const metadata = data?.metadata || {}; + + setTotalPages(parseInt(metadata?.total_pages) || 1); + if (parseInt(metadata?.page) <= parseInt(metadata?.total_pages)) { + setPage(parseInt(metadata?.page) + 1); + } + // Merge the new data with the existing data + if (currentData.length > 0) { + const updatedData = [...currentData, ...newData]; + setCurrentData(updatedData); + } else { + setCurrentData(newData); + } + setShowLoader(false); + }, onError: (error) => { toast.error("Failed to fetch experiments", { description: error instanceof Error ? error.message : String(error), @@ -37,7 +67,7 @@ export default function Experiments() { }, }); - if (experimentsLoading) { + if (fetchExperiments.isLoading) { return
Loading...
; } @@ -46,13 +76,11 @@ export default function Experiments() {

Experiments

-
- {experiments && experiments.runs && experiments.runs?.length === 0 && ( + {currentData.length === 0 && (

No experiments found. Get started by running your first @@ -76,8 +104,8 @@ export default function Experiments() {

)} - {experiments && experiments.runs && experiments.runs?.length > 0 && ( -
+ {currentData.length > 0 && ( +
@@ -107,7 +135,7 @@ export default function Experiments() { - {experiments?.runs.map((experiment: Run) => { + {currentData.map((experiment: Run) => { const log: any = JSON.parse(experiment.log as string); return ( Date: Wed, 12 Jun 2024 21:35:34 -0700 Subject: [PATCH 09/16] deduplicate paginated data --- .../project/[project_id]/experiments/page-client.tsx | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/app/(protected)/project/[project_id]/experiments/page-client.tsx b/app/(protected)/project/[project_id]/experiments/page-client.tsx index 13130120..b7895152 100644 --- a/app/(protected)/project/[project_id]/experiments/page-client.tsx +++ b/app/(protected)/project/[project_id]/experiments/page-client.tsx @@ -54,7 +54,12 @@ export default function Experiments() { // Merge the new data with the existing data if (currentData.length > 0) { const updatedData = [...currentData, ...newData]; - setCurrentData(updatedData); + // Remove duplicates + const uniqueData = updatedData.filter( + (v: any, i: number, a: any) => + a.findIndex((t: any) => t.id === v.id) === i + ); + setCurrentData(uniqueData); } else { setCurrentData(newData); } From 1930af7bb311ddd72a5ed31214f84b41c8bfc5c3 Mon Sep 17 00:00:00 2001 From: Karthik Kalyanaraman Date: Wed, 12 Jun 2024 22:11:05 -0700 Subject: [PATCH 10/16] Fix loading and error states - experiments --- .../experiments/[run_id]/page.tsx | 158 +++++---- .../[project_id]/experiments/compare/page.tsx | 39 ++- .../[project_id]/experiments/page-client.tsx | 312 +++++++++++------- 3 files changed, 298 insertions(+), 211 deletions(-) diff --git a/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx b/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx index b9e93ed5..326a72c8 100644 --- a/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx +++ b/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx @@ -12,6 +12,7 @@ import { SheetHeader, SheetTitle, } from "@/components/ui/sheet"; +import { Skeleton } from "@/components/ui/skeleton"; import { cn } from "@/lib/utils"; import { ChevronLeft, ChevronRight, MoveDiagonal, X } from "lucide-react"; import { useParams, useRouter } from "next/navigation"; @@ -57,26 +58,24 @@ export default function Experiments() { }, }); - if (experimentLoading) { - return
Loading...
; - } - return (

Run ID

{runId}

- - {experiment.status} - + {!experimentError && !experimentLoading && ( + + {experiment.status} + + )}
- + {!experimentError && !experimentLoading && ( + + )}
{experiment?.error && (
@@ -129,50 +134,65 @@ export default function Experiments() {
)} - {(!experiment?.samples || experiment?.samples?.length === 0) && ( + {experimentError && (

- No samples found for this experiment. + Failed to fetch the experiment. Please try again later.

-
)} - {experiment?.samples && experiment?.samples?.length > 0 && ( -
-
- - - - - - - - - - {experiment.samples.map((sample: any, i: number) => ( - { - setExpand( - expand && - expand.map((_: any, j: number) => { - return j === index ? value : expand[j]; - }) - ); - }} - /> - ))} - -
InputTarget{`Output - (${experiment.eval.model})`} - Explanation -
+ {((!experimentError && !experimentLoading && !experiment?.samples) || + experiment?.samples?.length === 0) && ( +
+

+ No samples found for this experiment. +

+
)} + {!experimentLoading && + experiment?.samples && + experiment?.samples?.length > 0 && ( +
+ + + + + + + + + + + {experiment.samples.map((sample: any, i: number) => ( + { + setExpand( + expand && + expand.map((_: any, j: number) => { + return j === index ? value : expand[j]; + }) + ); + }} + /> + ))} + +
+ Input + + Target + {`Output - (${experiment.eval.model})`} + Explanation +
+
+ )} + {experimentLoading && }
); diff --git a/app/(protected)/project/[project_id]/experiments/compare/page.tsx b/app/(protected)/project/[project_id]/experiments/compare/page.tsx index 58101d04..d03db789 100644 --- a/app/(protected)/project/[project_id]/experiments/compare/page.tsx +++ b/app/(protected)/project/[project_id]/experiments/compare/page.tsx @@ -2,6 +2,7 @@ import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; +import { Skeleton } from "@/components/ui/skeleton"; import { cn } from "@/lib/utils"; import { ChevronLeft } from "lucide-react"; import { useParams, useRouter, useSearchParams } from "next/navigation"; @@ -50,10 +51,6 @@ export default function Experiments() { }, }); - if (experimentsLoading) { - return
Loading...
; - } - return (
@@ -67,16 +64,26 @@ export default function Experiments() { Back
- {!experiments || - (experiments?.length === 0 && ( -
-

- No experiments found for comparison. -

- -
- ))} - {!isComparable && ( + {!experimentsLoading && + !experimentsError && + (!experiments || + (experiments?.length === 0 && ( +
+

+ No experiments found for comparison. +

+ +
+ )))} + {experimentsError && !experimentsLoading && ( +
+

+ Something went wrong while fetching the experiments. Please try + again. +

+
+ )} + {!experimentsLoading && !isComparable && (

The selected experiments are not comparable. Please select @@ -88,7 +95,8 @@ export default function Experiments() {

)} - {isComparable && + {!experimentsLoading && + isComparable && experiments && experiments[0]?.samples && experiments[0]?.samples?.length > 0 && ( @@ -119,6 +127,7 @@ export default function Experiments() {
)} + {experimentsLoading && }
); diff --git a/app/(protected)/project/[project_id]/experiments/page-client.tsx b/app/(protected)/project/[project_id]/experiments/page-client.tsx index b7895152..45eb665b 100644 --- a/app/(protected)/project/[project_id]/experiments/page-client.tsx +++ b/app/(protected)/project/[project_id]/experiments/page-client.tsx @@ -3,6 +3,7 @@ import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; import { Checkbox } from "@/components/ui/checkbox"; +import { Skeleton } from "@/components/ui/skeleton"; import { cn, formatDateTime } from "@/lib/utils"; import { Run } from "@prisma/client"; import { useParams, useRouter } from "next/navigation"; @@ -72,10 +73,6 @@ export default function Experiments() { }, }); - if (fetchExperiments.isLoading) { - return
Loading...
; - } - return (
@@ -99,132 +96,193 @@ export default function Experiments() {
-
- {currentData.length === 0 && ( -
-

- No experiments found. Get started by running your first - experiment. -

- -
- )} - {currentData.length > 0 && ( -
- - - - - - - - - - - - - - - - - - {currentData.map((experiment: Run) => { - const log: any = JSON.parse(experiment.log as string); - return ( - - router.push( - `/project/${projectId}/experiments/${log?.eval?.run_id}` - ) - } - > - + + + ); + })} + +
- - Run ID - Started at - - Completed at - Task - Total Samples - Model - Plan Name - Scorer - Metrics - Status
e.stopPropagation()} + {!fetchExperiments.isLoading && ( +
+ {currentData.length === 0 && ( +
+

+ {fetchExperiments.isError + ? "Something went wrong. Please try later." + : "No experiments found. Get started by running your first experiment."} +

+ +
+ )} + {currentData.length > 0 && ( +
+ + + + + + + + + + + + + + + + + + {currentData.map((experiment: Run) => { + const log: any = JSON.parse(experiment.log as string); + return ( + + router.push( + `/project/${projectId}/experiments/${log?.eval?.run_id}` + ) + } > - { - if (value) { - setComparisonRunIds([ - ...comparisonRunIds, - log?.eval?.run_id, - ]); - } else { - setComparisonRunIds( - comparisonRunIds.filter( - (id) => id !== log?.eval?.run_id + + + + + + + + + + - - - - - - - - - - - - ); - })} - -
+ + + Run ID + + Started at + + Completed at + Task + Total Samples + + Model + + Plan Name + + Scorer + + Metrics + + Status +
e.stopPropagation()} + > + { + if (value) { + setComparisonRunIds([ + ...comparisonRunIds, + log?.eval?.run_id, + ]); + } else { + setComparisonRunIds( + comparisonRunIds.filter( + (id) => id !== log?.eval?.run_id + ) + ); + } + }} + /> + + {log?.eval?.run_id} + + {formatDateTime(log?.stats?.started_at, true)} + + {formatDateTime(log?.stats?.completed_at, true)} + {log?.eval?.task} + {log?.samples?.length || 0} + + {log?.eval?.model} + {log?.plan?.name} + {log?.results?.scorer?.name || "N/A"} + + {log?.results?.metrics + ? Object.keys(log?.results?.metrics).map( + (metric, i) => ( + + {metric + + ": " + + (log?.results?.metrics as any)[ + metric + ].value.toFixed(2)} + ) - ); - } - }} - /> - {log?.eval?.run_id} - {formatDateTime(log?.stats?.started_at, true)} - - {formatDateTime(log?.stats?.completed_at, true)} - {log?.eval?.task} - {log?.samples?.length || 0} - {log?.eval?.model}{log?.plan?.name} - {log?.results?.scorer?.name || "N/A"} - - {log?.results?.metrics - ? Object.keys(log?.results?.metrics).map( - (metric, i) => ( - - {metric + - ": " + - (log?.results?.metrics as any)[ - metric - ].value.toFixed(2)} - ) - ) - : "N/A"} - - - {log?.status} - -
-
- )} -
+ : "N/A"} +
+ + {log?.status} + +
+
+ )} +
+ )} + {fetchExperiments.isLoading && ( +
+ + + + + + + + + + + + + + + + + + {[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11].map((j) => ( + + {[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11].map((i) => ( + + ))} + + ))} + +
+ + Run ID + Started at + + Completed at + Task + Total Samples + Model + Plan Name + ScorerMetricsStatus
+ +
+
+ )}
); } From 5bb474185a0ed2dc98050990a917e420ac2c7b5e Mon Sep 17 00:00:00 2001 From: Karthik Kalyanaraman Date: Wed, 12 Jun 2024 22:34:58 -0700 Subject: [PATCH 11/16] Rename experiments and evaluations --- .../{evaluate => annotations}/page-client.tsx | 12 +++---- .../{evaluate => annotations}/page.tsx | 0 .../datasets/dataset/[dataset_id]/page.tsx | 8 ++++- .../[run_id]/page.tsx | 35 +++++++++++++------ .../compare/page.tsx | 25 +++++++------ .../page-client.tsx | 30 ++++++++++------ .../{experiments => evaluations}/page.tsx | 0 .../{evaluate => annotations}/create-test.tsx | 0 .../{evaluate => annotations}/edit-test.tsx | 0 .../eval-scale-picker.tsx | 0 .../evaluation-row.tsx | 0 .../evaluation-table.tsx | 0 .../{evaluate => annotations}/range-scale.tsx | 0 .../report-utility.tsx | 0 components/shared/nav.tsx | 8 ++--- 15 files changed, 75 insertions(+), 43 deletions(-) rename app/(protected)/project/[project_id]/{evaluate => annotations}/page-client.tsx (94%) rename app/(protected)/project/[project_id]/{evaluate => annotations}/page.tsx (100%) rename app/(protected)/project/[project_id]/{experiments => evaluations}/[run_id]/page.tsx (92%) rename app/(protected)/project/[project_id]/{experiments => evaluations}/compare/page.tsx (90%) rename app/(protected)/project/[project_id]/{experiments => evaluations}/page-client.tsx (92%) rename app/(protected)/project/[project_id]/{experiments => evaluations}/page.tsx (100%) rename components/{evaluate => annotations}/create-test.tsx (100%) rename components/{evaluate => annotations}/edit-test.tsx (100%) rename components/{evaluate => annotations}/eval-scale-picker.tsx (100%) rename components/{evaluate => annotations}/evaluation-row.tsx (100%) rename components/{evaluate => annotations}/evaluation-table.tsx (100%) rename components/{evaluate => annotations}/range-scale.tsx (100%) rename components/{experiments => evaluations}/report-utility.tsx (100%) diff --git a/app/(protected)/project/[project_id]/evaluate/page-client.tsx b/app/(protected)/project/[project_id]/annotations/page-client.tsx similarity index 94% rename from app/(protected)/project/[project_id]/evaluate/page-client.tsx rename to app/(protected)/project/[project_id]/annotations/page-client.tsx index add3f5b6..8632f83a 100644 --- a/app/(protected)/project/[project_id]/evaluate/page-client.tsx +++ b/app/(protected)/project/[project_id]/annotations/page-client.tsx @@ -1,12 +1,12 @@ "use client"; -import { EvalChart } from "@/components/charts/eval-chart"; -import LargeChartSkeleton from "@/components/charts/large-chart-skeleton"; -import { CreateTest } from "@/components/evaluate/create-test"; -import { EditTest } from "@/components/evaluate/edit-test"; +import { CreateTest } from "@/components/annotations/create-test"; +import { EditTest } from "@/components/annotations/edit-test"; import EvaluationTable, { EvaluationTableSkeleton, -} from "@/components/evaluate/evaluation-table"; +} from "@/components/annotations/evaluation-table"; +import { EvalChart } from "@/components/charts/eval-chart"; +import LargeChartSkeleton from "@/components/charts/large-chart-skeleton"; import { AddtoDataset } from "@/components/shared/add-to-dataset"; import { Button } from "@/components/ui/button"; import { Test } from "@prisma/client"; @@ -73,7 +73,7 @@ export default function PageClient({ email }: { email: string }) { return (
-

Evaluations

+

Annotate & Measure

{tests?.tests?.length > 0 && ( diff --git a/app/(protected)/project/[project_id]/evaluate/page.tsx b/app/(protected)/project/[project_id]/annotations/page.tsx similarity index 100% rename from app/(protected)/project/[project_id]/evaluate/page.tsx rename to app/(protected)/project/[project_id]/annotations/page.tsx diff --git a/app/(protected)/project/[project_id]/datasets/dataset/[dataset_id]/page.tsx b/app/(protected)/project/[project_id]/datasets/dataset/[dataset_id]/page.tsx index 0803efba..06587a2f 100644 --- a/app/(protected)/project/[project_id]/datasets/dataset/[dataset_id]/page.tsx +++ b/app/(protected)/project/[project_id]/datasets/dataset/[dataset_id]/page.tsx @@ -9,7 +9,8 @@ import { Button } from "@/components/ui/button"; import { Separator } from "@/components/ui/separator"; import { PAGE_SIZE } from "@/lib/constants"; import { Data } from "@prisma/client"; -import { ChevronLeft } from "lucide-react"; +import { ArrowTopRightIcon } from "@radix-ui/react-icons"; +import { ChevronLeft, FlaskConical } from "lucide-react"; import { useParams } from "next/navigation"; import { useState } from "react"; import { useBottomScrollListener } from "react-bottom-scroll-listener"; @@ -98,6 +99,11 @@ export default function Dataset() { datasetId={dataset_id} disabled={fetchDataset.isLoading || currentData?.length === 0} /> +
diff --git a/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx b/app/(protected)/project/[project_id]/evaluations/[run_id]/page.tsx similarity index 92% rename from app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx rename to app/(protected)/project/[project_id]/evaluations/[run_id]/page.tsx index 326a72c8..11e5d9e6 100644 --- a/app/(protected)/project/[project_id]/experiments/[run_id]/page.tsx +++ b/app/(protected)/project/[project_id]/evaluations/[run_id]/page.tsx @@ -1,6 +1,6 @@ "use client"; -import { UtilityButton } from "@/components/experiments/report-utility"; +import { UtilityButton } from "@/components/evaluations/report-utility"; import { Conversation } from "@/components/shared/conversation-view"; import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; @@ -14,13 +14,20 @@ import { } from "@/components/ui/sheet"; import { Skeleton } from "@/components/ui/skeleton"; import { cn } from "@/lib/utils"; -import { ChevronLeft, ChevronRight, MoveDiagonal, X } from "lucide-react"; +import { ArrowTopRightIcon } from "@radix-ui/react-icons"; +import { + ChevronLeft, + ChevronRight, + FlaskConical, + MoveDiagonal, + X, +} from "lucide-react"; import { useParams, useRouter } from "next/navigation"; import { useState } from "react"; import { useQuery } from "react-query"; import { toast } from "sonner"; -export default function Experiments() { +export default function Evaluation() { const router = useRouter(); const runId = useParams()?.run_id as string; const projectId = useParams()?.project_id as string; @@ -36,11 +43,11 @@ export default function Experiments() { ); if (!response.ok) { const error = await response.json(); - throw new Error(error?.message || "Failed to fetch the experiment"); + throw new Error(error?.message || "Failed to fetch the evaluation"); } const result = await response.json(); if (!result.run || !result.run.log) { - throw new Error("No experiment found"); + throw new Error("No evaluations found"); } const exp = JSON.parse(result.run.log); setExperiment(exp); @@ -52,7 +59,7 @@ export default function Experiments() { return result; }, onError: (error) => { - toast.error("Failed to fetch the experiment", { + toast.error("Failed to fetch the evaluation", { description: error instanceof Error ? error.message : String(error), }); }, @@ -84,7 +91,9 @@ export default function Experiments() { : "default" } > - New Experiment + New Evaluation + +
@@ -121,7 +130,7 @@ export default function Experiments() { {experiment?.error && (

- An error occurred while running this experiment. See below for + An error occurred while running this evaluation. See below for more details

@@ -137,7 +146,7 @@ export default function Experiments() { {experimentError && (

- Failed to fetch the experiment. Please try again later. + Failed to fetch the evaluation. Please try again later.

)} @@ -145,9 +154,13 @@ export default function Experiments() { experiment?.samples?.length === 0) && (

- No samples found for this experiment. + No samples found for this evaluation.

- +
)} {!experimentLoading && diff --git a/app/(protected)/project/[project_id]/experiments/compare/page.tsx b/app/(protected)/project/[project_id]/evaluations/compare/page.tsx similarity index 90% rename from app/(protected)/project/[project_id]/experiments/compare/page.tsx rename to app/(protected)/project/[project_id]/evaluations/compare/page.tsx index d03db789..f61d4fc2 100644 --- a/app/(protected)/project/[project_id]/experiments/compare/page.tsx +++ b/app/(protected)/project/[project_id]/evaluations/compare/page.tsx @@ -4,13 +4,14 @@ import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; import { Skeleton } from "@/components/ui/skeleton"; import { cn } from "@/lib/utils"; -import { ChevronLeft } from "lucide-react"; +import { ArrowTopRightIcon } from "@radix-ui/react-icons"; +import { ChevronLeft, FlaskConical } from "lucide-react"; import { useParams, useRouter, useSearchParams } from "next/navigation"; import { useState } from "react"; import { useQuery } from "react-query"; import { toast } from "sonner"; -export default function Experiments() { +export default function Compare() { const router = useRouter(); // get run id from query params const searchParams = useSearchParams(); @@ -31,11 +32,11 @@ export default function Experiments() { ); if (!response.ok) { const error = await response.json(); - throw new Error(error?.message || "Failed to fetch the experiment"); + throw new Error(error?.message || "Failed to fetch the evaluation"); } const result = await response.json(); if (!result.run || !result.run.log) { - throw new Error("No experiment found"); + throw new Error("No evaluation found"); } return JSON.parse(result.run.log); }); @@ -45,7 +46,7 @@ export default function Experiments() { return exps; }, onError: (error) => { - toast.error("Failed to fetch one or more experiments", { + toast.error("Failed to fetch one or more evaluations", { description: error instanceof Error ? error.message : String(error), }); }, @@ -70,15 +71,19 @@ export default function Experiments() { (experiments?.length === 0 && (

- No experiments found for comparison. + No evaluations found for comparison.

- +
)))} {experimentsError && !experimentsLoading && (

- Something went wrong while fetching the experiments. Please try + Something went wrong while fetching the evaluations. Please try again.

@@ -86,8 +91,8 @@ export default function Experiments() { {!experimentsLoading && !isComparable && (

- The selected experiments are not comparable. Please select - experiments ran against the same dataset. + The selected evaluations are not comparable. Please select + evaluations ran against the same dataset.

+
{!fetchExperiments.isLoading && ( @@ -103,9 +107,13 @@ export default function Experiments() {

{fetchExperiments.isError ? "Something went wrong. Please try later." - : "No experiments found. Get started by running your first experiment."} + : "No evaluations found. Get started by running your first evaluation."}

- +
)} {currentData.length > 0 && ( @@ -155,7 +163,7 @@ export default function Experiments() { className="hover:cursor-pointer hover:bg-muted" onClick={() => router.push( - `/project/${projectId}/experiments/${log?.eval?.run_id}` + `/project/${projectId}/evaluations/${log?.eval?.run_id}` ) } > diff --git a/app/(protected)/project/[project_id]/experiments/page.tsx b/app/(protected)/project/[project_id]/evaluations/page.tsx similarity index 100% rename from app/(protected)/project/[project_id]/experiments/page.tsx rename to app/(protected)/project/[project_id]/evaluations/page.tsx diff --git a/components/evaluate/create-test.tsx b/components/annotations/create-test.tsx similarity index 100% rename from components/evaluate/create-test.tsx rename to components/annotations/create-test.tsx diff --git a/components/evaluate/edit-test.tsx b/components/annotations/edit-test.tsx similarity index 100% rename from components/evaluate/edit-test.tsx rename to components/annotations/edit-test.tsx diff --git a/components/evaluate/eval-scale-picker.tsx b/components/annotations/eval-scale-picker.tsx similarity index 100% rename from components/evaluate/eval-scale-picker.tsx rename to components/annotations/eval-scale-picker.tsx diff --git a/components/evaluate/evaluation-row.tsx b/components/annotations/evaluation-row.tsx similarity index 100% rename from components/evaluate/evaluation-row.tsx rename to components/annotations/evaluation-row.tsx diff --git a/components/evaluate/evaluation-table.tsx b/components/annotations/evaluation-table.tsx similarity index 100% rename from components/evaluate/evaluation-table.tsx rename to components/annotations/evaluation-table.tsx diff --git a/components/evaluate/range-scale.tsx b/components/annotations/range-scale.tsx similarity index 100% rename from components/evaluate/range-scale.tsx rename to components/annotations/range-scale.tsx diff --git a/components/experiments/report-utility.tsx b/components/evaluations/report-utility.tsx similarity index 100% rename from components/experiments/report-utility.tsx rename to components/evaluations/report-utility.tsx diff --git a/components/shared/nav.tsx b/components/shared/nav.tsx index 931d8a16..1f797012 100644 --- a/components/shared/nav.tsx +++ b/components/shared/nav.tsx @@ -24,16 +24,16 @@ const ProjectNavLinks = (id: string) => [ href: `/project/${id}/traces`, }, { - name: "Evaluate", - href: `/project/${id}/evaluate`, + name: "Annotations", + href: `/project/${id}/annotations`, }, { name: "Datasets", href: `/project/${id}/datasets`, }, { - name: "Experiments", - href: `/project/${id}/experiments`, + name: "Evaluations", + href: `/project/${id}/evaluations`, }, { name: "Playground", From b1655880106d033c639bd42f4dfa5ec83f8ba5b0 Mon Sep 17 00:00:00 2001 From: Karthik Kalyanaraman Date: Wed, 12 Jun 2024 22:36:28 -0700 Subject: [PATCH 12/16] Add loading spinner --- .../project/[project_id]/evaluations/page-client.tsx | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/app/(protected)/project/[project_id]/evaluations/page-client.tsx b/app/(protected)/project/[project_id]/evaluations/page-client.tsx index 3f22f2bd..1566093a 100644 --- a/app/(protected)/project/[project_id]/evaluations/page-client.tsx +++ b/app/(protected)/project/[project_id]/evaluations/page-client.tsx @@ -1,5 +1,6 @@ "use client"; +import { Spinner } from "@/components/shared/spinner"; import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; import { Checkbox } from "@/components/ui/checkbox"; @@ -245,6 +246,11 @@ export default function Evaluations() { })} + {showLoader && ( +
+ +
+ )}
)}
From 2d30fe96c19338d0cd9253a0728e5a435927fd0c Mon Sep 17 00:00:00 2001 From: Karthik Kalyanaraman Date: Wed, 12 Jun 2024 23:43:11 -0700 Subject: [PATCH 13/16] Link evaluation --- .../datasets/dataset/[dataset_id]/page.tsx | 45 ++++++++++++------- .../evaluations/[run_id]/page.tsx | 38 +++++++++------- .../[project_id]/evaluations/compare/page.tsx | 14 +++--- .../[project_id]/evaluations/page-client.tsx | 26 ++++++----- lib/constants.ts | 2 + 5 files changed, 77 insertions(+), 48 deletions(-) diff --git a/app/(protected)/project/[project_id]/datasets/dataset/[dataset_id]/page.tsx b/app/(protected)/project/[project_id]/datasets/dataset/[dataset_id]/page.tsx index 06587a2f..34acb744 100644 --- a/app/(protected)/project/[project_id]/datasets/dataset/[dataset_id]/page.tsx +++ b/app/(protected)/project/[project_id]/datasets/dataset/[dataset_id]/page.tsx @@ -5,12 +5,14 @@ import DatasetRowSkeleton from "@/components/project/dataset/dataset-row-skeleto import { EditData } from "@/components/project/dataset/edit-data"; import { DownloadDataset } from "@/components/shared/download-dataset"; import { Spinner } from "@/components/shared/spinner"; +import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; import { Separator } from "@/components/ui/separator"; -import { PAGE_SIZE } from "@/lib/constants"; +import { EVALUATIONS_DOCS_URL, PAGE_SIZE } from "@/lib/constants"; import { Data } from "@prisma/client"; import { ArrowTopRightIcon } from "@radix-ui/react-icons"; import { ChevronLeft, FlaskConical } from "lucide-react"; +import Link from "next/link"; import { useParams } from "next/navigation"; import { useState } from "react"; import { useBottomScrollListener } from "react-bottom-scroll-listener"; @@ -88,22 +90,31 @@ export default function Dataset() { } else { return (
-
- - - - +
+
+ + + +
+
+ + Dataset ID: {dataset_id} + + + + +
diff --git a/app/(protected)/project/[project_id]/evaluations/[run_id]/page.tsx b/app/(protected)/project/[project_id]/evaluations/[run_id]/page.tsx index 11e5d9e6..fecb5a2a 100644 --- a/app/(protected)/project/[project_id]/evaluations/[run_id]/page.tsx +++ b/app/(protected)/project/[project_id]/evaluations/[run_id]/page.tsx @@ -13,6 +13,7 @@ import { SheetTitle, } from "@/components/ui/sheet"; import { Skeleton } from "@/components/ui/skeleton"; +import { EVALUATIONS_DOCS_URL } from "@/lib/constants"; import { cn } from "@/lib/utils"; import { ArrowTopRightIcon } from "@radix-ui/react-icons"; import { @@ -22,6 +23,7 @@ import { MoveDiagonal, X, } from "lucide-react"; +import Link from "next/link"; import { useParams, useRouter } from "next/navigation"; import { useState } from "react"; import { useQuery } from "react-query"; @@ -84,17 +86,19 @@ export default function Evaluation() { )}
- + + +
@@ -156,11 +160,13 @@ export default function Evaluation() {

No samples found for this evaluation.

- + + +
)} {!experimentLoading && diff --git a/app/(protected)/project/[project_id]/evaluations/compare/page.tsx b/app/(protected)/project/[project_id]/evaluations/compare/page.tsx index f61d4fc2..420f3fdd 100644 --- a/app/(protected)/project/[project_id]/evaluations/compare/page.tsx +++ b/app/(protected)/project/[project_id]/evaluations/compare/page.tsx @@ -3,9 +3,11 @@ import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; import { Skeleton } from "@/components/ui/skeleton"; +import { EVALUATIONS_DOCS_URL } from "@/lib/constants"; import { cn } from "@/lib/utils"; import { ArrowTopRightIcon } from "@radix-ui/react-icons"; import { ChevronLeft, FlaskConical } from "lucide-react"; +import Link from "next/link"; import { useParams, useRouter, useSearchParams } from "next/navigation"; import { useState } from "react"; import { useQuery } from "react-query"; @@ -73,11 +75,13 @@ export default function Compare() {

No evaluations found for comparison.

- + + +
)))} {experimentsError && !experimentsLoading && ( diff --git a/app/(protected)/project/[project_id]/evaluations/page-client.tsx b/app/(protected)/project/[project_id]/evaluations/page-client.tsx index 1566093a..2156110a 100644 --- a/app/(protected)/project/[project_id]/evaluations/page-client.tsx +++ b/app/(protected)/project/[project_id]/evaluations/page-client.tsx @@ -5,10 +5,12 @@ import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; import { Checkbox } from "@/components/ui/checkbox"; import { Skeleton } from "@/components/ui/skeleton"; +import { EVALUATIONS_DOCS_URL } from "@/lib/constants"; import { cn, formatDateTime } from "@/lib/utils"; import { Run } from "@prisma/client"; import { ArrowTopRightIcon } from "@radix-ui/react-icons"; import { FlaskConical } from "lucide-react"; +import Link from "next/link"; import { useParams, useRouter } from "next/navigation"; import { useState } from "react"; import { useBottomScrollListener } from "react-bottom-scroll-listener"; @@ -94,11 +96,13 @@ export default function Evaluations() { > Compare - + + +
{!fetchExperiments.isLoading && ( @@ -110,11 +114,13 @@ export default function Evaluations() { ? "Something went wrong. Please try later." : "No evaluations found. Get started by running your first evaluation."}

- + + +
)} {currentData.length > 0 && ( diff --git a/lib/constants.ts b/lib/constants.ts index e5d9a15b..766e65ad 100644 --- a/lib/constants.ts +++ b/lib/constants.ts @@ -5,6 +5,8 @@ import { } from "@langtrase/trace-attributes"; import { Test } from "@prisma/client"; import { TiktokenEncoding } from "js-tiktoken"; +export const EVALUATIONS_DOCS_URL = + "https://docs.langtrace.ai/features/evaluations"; export const SCHEDULE_CALL_LINK = "https://calendar.app.google/Go5gXNPcqZjAY4i47"; From 405dc9207d4a97dafdf82a460c646097daaab0df Mon Sep 17 00:00:00 2001 From: Karthik Kalyanaraman Date: Wed, 12 Jun 2024 23:44:10 -0700 Subject: [PATCH 14/16] Link to evals --- .../project/[project_id]/evaluations/[run_id]/page.tsx | 4 ++-- .../project/[project_id]/evaluations/compare/page.tsx | 2 +- .../project/[project_id]/evaluations/page-client.tsx | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/app/(protected)/project/[project_id]/evaluations/[run_id]/page.tsx b/app/(protected)/project/[project_id]/evaluations/[run_id]/page.tsx index fecb5a2a..758f9dcd 100644 --- a/app/(protected)/project/[project_id]/evaluations/[run_id]/page.tsx +++ b/app/(protected)/project/[project_id]/evaluations/[run_id]/page.tsx @@ -86,7 +86,7 @@ export default function Evaluation() { )}
- + - +
))} -
+
- +

Advanced Filters