Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
File renamed without changes.
1 change: 1 addition & 0 deletions apps/web-docs/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
TODO
1 change: 1 addition & 0 deletions apps/web-evals/.env
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
DATABASE_URL=postgres://postgres:password@localhost:5432/evals_development
8 changes: 8 additions & 0 deletions apps/web-evals/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# .env
!.env

# next.js
.next

# typescript
tsconfig.tsbuildinfo
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { nextJsConfig } from "@evals/eslint-config/next-js"
import { nextJsConfig } from "@roo-code/config-eslint/next-js"

/** @type {import("eslint").Linter.Config} */
export default [
Expand Down
5 changes: 5 additions & 0 deletions apps/web-evals/next-env.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
/// <reference types="next" />
/// <reference types="next/image-types/global" />

// NOTE: This file should not be edited
// see https://nextjs.org/docs/app/api-reference/config/typescript for more information.
File renamed without changes.
30 changes: 15 additions & 15 deletions evals/apps/web/package.json → apps/web-evals/package.json
Original file line number Diff line number Diff line change
@@ -1,18 +1,15 @@
{
"name": "@evals/web",
"name": "@roo-code/web-evals",
"private": true,
"scripts": {
"lint": "next lint",
"check-types": "tsc -b",
"dev": "dotenvx run -f ../../.env -- next dev --turbopack",
"dev": "next dev --turbopack",
"format": "prettier --write src",
"build": "next build",
"start": "next start"
},
"dependencies": {
"@evals/db": "workspace:^",
"@evals/ipc": "workspace:^",
"@evals/types": "workspace:^",
"@hookform/resolvers": "^4.1.3",
"@radix-ui/react-alert-dialog": "^1.1.7",
"@radix-ui/react-dialog": "^1.1.6",
Expand All @@ -26,33 +23,36 @@
"@radix-ui/react-slot": "^1.1.2",
"@radix-ui/react-tabs": "^1.1.3",
"@radix-ui/react-tooltip": "^1.1.8",
"@roo-code/evals": "workspace:^",
"@roo-code/ipc": "workspace:^",
"@roo-code/types": "workspace:^",
"@tanstack/react-query": "^5.69.0",
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"cmdk": "^1.1.0",
"fuzzysort": "^3.1.0",
"lucide-react": "^0.511.0",
"next": "15.3.3",
"next": "^15.2.5",
"next-themes": "^0.4.6",
"p-map": "^7.0.3",
"ps-tree": "^1.2.0",
"react": "^19.0.0",
"react-dom": "^19.0.0",
"react-hook-form": "^7.54.2",
"react": "^18.3.1",
"react-dom": "^18.3.1",
"react-hook-form": "^7.57.0",
"react-use": "^17.6.0",
"sonner": "^2.0.2",
"tailwind-merge": "^3.0.2",
"sonner": "^2.0.5",
"tailwind-merge": "^3.3.0",
"tailwindcss-animate": "^1.0.7",
"vaul": "^1.1.2",
"zod": "^3.24.2"
},
"devDependencies": {
"@evals/eslint-config": "workspace:^",
"@evals/typescript-config": "workspace:^",
"@roo-code/config-eslint": "workspace:^",
"@roo-code/config-typescript": "workspace:^",
"@tailwindcss/postcss": "^4",
"@types/ps-tree": "^1.1.6",
"@types/react": "^19",
"@types/react-dom": "^19",
"@types/react": "^18.3.18",
"@types/react-dom": "^18.3.5",
"tailwindcss": "^4"
}
}
File renamed without changes.
Empty file.
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import type { NextRequest } from "next/server"

import { findRun } from "@evals/db"
import { IpcMessageType } from "@evals/types"
import { IpcClient } from "@evals/ipc"
import { findRun } from "@roo-code/evals"
import { IpcClient } from "@roo-code/ipc"
import { IpcMessageType } from "@roo-code/types"

import { SSEStream } from "@/lib/server/sse-stream"

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { NextResponse } from "next/server"

import { createRun } from "@evals/db"
import { createRun } from "@roo-code/evals"

export async function POST(request: Request) {
try {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { NextResponse } from "next/server"

import { createTask } from "@evals/db"
import { createTask } from "@roo-code/evals"

export async function POST(request: Request) {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { useRouter } from "next/navigation"
import Link from "next/link"
import { Ellipsis, Rocket } from "lucide-react"

import type { Run, TaskMetrics } from "@evals/db"
import type { Run, TaskMetrics } from "@roo-code/evals"

import { deleteRun } from "@/lib/server/runs"
import { formatCurrency, formatDuration, formatTokens, formatToolUsageSuccessRate } from "@/lib/formatters"
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { getRuns } from "@evals/db"
import { getRuns } from "@roo-code/evals"

import { Home } from "./home"

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { findRun } from "@evals/db"
import { findRun } from "@roo-code/evals"

import { Run } from "./run"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import { useMemo } from "react"
import { LoaderCircle } from "lucide-react"

import * as db from "@evals/db"
import type { Run, TaskMetrics as _TaskMetrics } from "@roo-code/evals"

import { formatCurrency, formatDuration, formatTokens } from "@/lib/formatters"
import { useRunStatus } from "@/hooks/use-run-status"
Expand All @@ -12,9 +12,9 @@ import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@
import { TaskStatus } from "./task-status"
import { ConnectionStatus } from "./connection-status"

type TaskMetrics = Pick<db.TaskMetrics, "tokensIn" | "tokensOut" | "tokensContext" | "duration" | "cost">
type TaskMetrics = Pick<_TaskMetrics, "tokensIn" | "tokensOut" | "tokensContext" | "duration" | "cost">

export function Run({ run }: { run: db.Run }) {
export function Run({ run }: { run: Run }) {
const { tasks, status, tokenUsage, usageUpdatedAt } = useRunStatus(run)

const taskMetrics: Record<number, TaskMetrics> = useMemo(() => {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { CircleCheck, CircleDashed, CircleSlash, LoaderCircle } from "lucide-react"

import { type Task } from "@evals/db"
import type { Task } from "@roo-code/evals"

type TaskStatusProps = {
task: Task
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { RooCodeSettings } from "./roo-code.js"
import { RooCodeSettings } from "@roo-code/types"

export const rooCodeDefaults: RooCodeSettings = {
apiProvider: "openrouter",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import fuzzysort from "fuzzysort"
import { toast } from "sonner"
import { X, Rocket, Check, ChevronsUpDown, SlidersHorizontal, Book, CircleCheck } from "lucide-react"

import { globalSettingsSchema, providerSettingsSchema, rooCodeDefaults } from "@evals/types"
import { globalSettingsSchema, providerSettingsSchema } from "@roo-code/types"

import { createRun } from "@/lib/server/runs"
import {
Expand Down Expand Up @@ -51,6 +51,7 @@ import {
DialogFooter,
} from "@/components/ui"

import { rooCodeDefaults } from "./defaults"
import { SettingsDiff } from "./settings-diff"

export function NewRun() {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import { Fragment, HTMLAttributes } from "react"

import { RooCodeSettings, ROO_CODE_SETTINGS_KEYS } from "@evals/types"
import { type Keys, type RooCodeSettings, GLOBAL_SETTINGS_KEYS, PROVIDER_SETTINGS_KEYS } from "@roo-code/types"

import { cn } from "@/lib/utils"

export const ROO_CODE_SETTINGS_KEYS = [...GLOBAL_SETTINGS_KEYS, ...PROVIDER_SETTINGS_KEYS] as Keys<RooCodeSettings>[]

type SettingsDiffProps = HTMLAttributes<HTMLDivElement> & {
defaultSettings: RooCodeSettings
customSettings: RooCodeSettings
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import * as ScrollAreaPrimitive from "@radix-ui/react-scroll-area"
import { cn } from "@/lib/utils"

type ScrollAreaProps = React.ComponentProps<typeof ScrollAreaPrimitive.Root> & {
viewportRef?: React.RefObject<HTMLDivElement | null>
viewportRef?: React.RefObject<HTMLDivElement>
}

function ScrollArea({ className, children, viewportRef, ...props }: ScrollAreaProps) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ import { useQuery } from "@tanstack/react-query"

import { getExercises } from "@/lib/server/exercises"

export const useExercises = () => useQuery({ queryKey: ["exercises"], queryFn: getExercises })
export const useExercises = () => useQuery({ queryKey: ["exercises"], queryFn: () => getExercises() })
32 changes: 32 additions & 0 deletions apps/web-evals/src/hooks/use-open-router-models.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import { z } from "zod"
import { useQuery } from "@tanstack/react-query"

export const openRouterModelSchema = z.object({
id: z.string(),
name: z.string(),
})

export type OpenRouterModel = z.infer<typeof openRouterModelSchema>

export const getOpenRouterModels = async (): Promise<OpenRouterModel[]> => {
const response = await fetch("https://openrouter.ai/api/v1/models")

if (!response.ok) {
return []
}

const result = z.object({ data: z.array(openRouterModelSchema) }).safeParse(await response.json())

if (!result.success) {
console.error(result.error)
return []
}

return result.data.data.sort((a, b) => a.name.localeCompare(b.name))
}

export const useOpenRouterModels = () =>
useQuery({
queryKey: ["getOpenRouterModels"],
queryFn: getOpenRouterModels,
})
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import { useState, useCallback, useRef } from "react"
import { useQuery, keepPreviousData } from "@tanstack/react-query"

import { TokenUsage, taskEventSchema, RooCodeEventName, EvalEventName } from "@evals/types"
import { Run } from "@evals/db"
import { type TokenUsage, RooCodeEventName, taskEventSchema } from "@roo-code/types"
import type { Run } from "@roo-code/evals"

import { getTasks } from "@/lib/server/tasks"
import { useEventSource } from "@/hooks/use-event-source"
Expand Down Expand Up @@ -58,8 +58,8 @@ export const useRunStatus = (run: Run) => {
setUsageUpdatedAt(Date.now())
break
}
case EvalEventName.Pass:
case EvalEventName.Fail:
case RooCodeEventName.EvalPass:
case RooCodeEventName.EvalFail:
setTasksUpdatedAt(Date.now())
break
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { z } from "zod"

import { rooCodeSettingsSchema } from "@evals/types"
import { rooCodeSettingsSchema } from "@roo-code/types"

/**
* CreateRun
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import * as fs from "fs/promises"
import * as path from "path"
import { fileURLToPath } from "url"

import { ExerciseLanguage, exerciseLanguages } from "@evals/types"
import { type ExerciseLanguage, exerciseLanguages } from "@roo-code/evals"

const __dirname = path.dirname(fileURLToPath(import.meta.url))

Expand All @@ -20,7 +20,7 @@ export const listDirectories = async (relativePath: string) => {
}

// __dirname = <repo>/evals/apps/web/src/lib/server
const EXERCISES_BASE_PATH = path.resolve(__dirname, "../../../../../../../evals")
const EXERCISES_BASE_PATH = path.resolve(__dirname, "../../../../../../evals")

export const getExercises = async () => {
const result = await Promise.all(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,21 @@ import fs from "fs"
import { revalidatePath } from "next/cache"
import pMap from "p-map"

import { ExerciseLanguage, exerciseLanguages } from "@evals/types"
import * as db from "@evals/db"
import {
type ExerciseLanguage,
exerciseLanguages,
createRun as _createRun,
updateRun as _updateRun,
deleteRun as _deleteRun,
createTask,
} from "@roo-code/evals"

import { CreateRun } from "@/lib/schemas"

import { getExercisesForLanguage } from "./exercises"

export async function createRun({ suite, exercises = [], systemPrompt, ...values }: CreateRun) {
const run = await db.createRun({
const run = await _createRun({
...values,
socketPath: path.join(os.tmpdir(), `roo-code-evals-${crypto.randomUUID()}.sock`),
})
Expand All @@ -28,13 +35,13 @@ export async function createRun({ suite, exercises = [], systemPrompt, ...values
throw new Error("Invalid exercise path: " + path)
}

await db.createTask({ ...values, runId: run.id, language: language as ExerciseLanguage, exercise })
await createTask({ ...values, runId: run.id, language: language as ExerciseLanguage, exercise })
}
} else {
for (const language of exerciseLanguages) {
const exercises = await getExercisesForLanguage(language)

await pMap(exercises, (exercise) => db.createTask({ ...values, runId: run.id, language, exercise }), {
await pMap(exercises, (exercise) => createTask({ ...values, runId: run.id, language, exercise }), {
concurrency: 10,
})
}
Expand All @@ -49,18 +56,14 @@ export async function createRun({ suite, exercises = [], systemPrompt, ...values
? { ...process.env, FOOTGUN_SYSTEM_PROMPT: systemPrompt }
: process.env

const childProcess = spawn(
"pnpm",
["--filter", "@evals/cli", "dev", "run", "all", "--runId", run.id.toString()],
{
detached: true,
stdio: ["ignore", logFile, logFile],
env,
},
)
const childProcess = spawn("pnpm", ["--filter", "@roo-code/evals", "cli", run.id.toString()], {
detached: true,
stdio: ["ignore", logFile, logFile],
env,
})

childProcess.unref()
await db.updateRun(run.id, { pid: childProcess.pid })
await _updateRun(run.id, { pid: childProcess.pid })
} catch (error) {
console.error(error)
}
Expand All @@ -69,6 +72,6 @@ export async function createRun({ suite, exercises = [], systemPrompt, ...values
}

export async function deleteRun(runId: number) {
await db.deleteRun(runId)
await _deleteRun(runId)
revalidatePath("/runs")
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

import { revalidatePath } from "next/cache"

import * as db from "@evals/db"
import { getTasks as _getTasks } from "@roo-code/evals"

export async function getTasks(runId: number) {
const tasks = await db.getTasks(runId)
const tasks = await _getTasks(runId)
revalidatePath(`/runs/${runId}`)
return tasks
}
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"extends": "@evals/typescript-config/nextjs.json",
"extends": "@roo-code/config-typescript/nextjs.json",
"compilerOptions": {
"plugins": [{ "name": "next" }],
"paths": { "@/*": ["./src/*"] }
Expand Down
Empty file added apps/web-roo-code/.gitkeep
Empty file.
1 change: 1 addition & 0 deletions apps/web-roo-code/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
TODO
Loading